diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph b/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph new file mode 100644 index 0000000..7ea84fd --- /dev/null +++ b/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph @@ -0,0 +1,57 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 0.01 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 # 0 => cart +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1 1 1 +amr.n_cell = 256 256 256 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp +#amr.grid_log = grdlog # name of grid logging file + +# REFINEMENT / REGRIDDING +amr.max_level = 0 # maximum level number allowed +amr.ref_ratio = 4 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 4 # block factor in grid generation +amr.max_grid_size = 32 + + +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# CHECKPOINT FILES +#amr.check_file = sedov_3d_chk # root name of checkpoint file +#amr.check_int = 200 # number of timesteps between checkpoints + +# PLOTFILES +#amr.plot_file = sedov_3d_plt +#amr.plot_int = 50 +amr.derive_plot_vars=ALL + +# PROBIN FILENAME +amr.probin_file = probin.3d.sph diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph_1level b/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph_1level new file mode 100644 index 0000000..c50947d --- /dev/null +++ b/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph_1level @@ -0,0 +1,57 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 0.01 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 # 0 => cart +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1 1 1 +amr.n_cell = 256 256 256 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp +#amr.grid_log = grdlog # name of grid logging file + +# REFINEMENT / REGRIDDING +amr.max_level = 1 # maximum level number allowed +amr.ref_ratio = 4 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 4 # block factor in grid generation +amr.max_grid_size = 32 + + +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# CHECKPOINT FILES +#amr.check_file = sedov_3d_chk # root name of checkpoint file +#amr.check_int = 200 # number of timesteps between checkpoints + +# PLOTFILES +#amr.plot_file = sedov_3d_plt +#amr.plot_int = 50 +amr.derive_plot_vars=ALL + +# PROBIN FILENAME +amr.probin_file = probin.3d.sph diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/probin.3d.sph b/scaling/castro/sedov/summit_201905/CPU_runs/probin.3d.sph new file mode 100644 index 0000000..28bf051 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/CPU_runs/probin.3d.sph @@ -0,0 +1,29 @@ +&fortin + + r_init = 0.01 + p_ambient = 1.d-5 + exp_energy = 1.0 + dens_ambient = 1.0 + nsub = 10 + +/ + +&tagging + + denerr = 3 + dengrad = 0.01 + max_denerr_lev = 3 + max_dengrad_lev = 3 + + presserr = 3 + pressgrad = 0.01 + max_presserr_lev = 3 + max_pressgrad_lev = 3 + +/ + +&extern + + eos_assume_neutral = T + +/ diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_16node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_16node_nogpu.sh new file mode 100644 index 0000000..e2c605a --- /dev/null +++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_16node_nogpu.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 10 +#BSUB -nnodes 16 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_cpu +#BSUB -o Sedov_cpu.%J +#BSUB -e Sedov_cpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph + +n_mpi=96 # 16 nodes * 6 mpi per node +n_omp=7 +n_gpu=0 +n_cores=7 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.OMP.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_32node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_32node_nogpu.sh new file mode 100644 index 0000000..64fa3c8 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_32node_nogpu.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 10 +#BSUB -nnodes 32 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_cpu +#BSUB -o Sedov_cpu.%J +#BSUB -e Sedov_cpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=192 # 32 nodes * 6 mpi per node +n_omp=7 +n_gpu=0 +n_cores=7 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.OMP.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_4node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_4node_nogpu.sh new file mode 100644 index 0000000..3e4a986 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_4node_nogpu.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 25 +#BSUB -nnodes 4 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_cpu +#BSUB -o Sedov_cpu.%J +#BSUB -e Sedov_cpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=24 # 4 nodes * 6 mpi per node +n_omp=7 +n_gpu=0 +n_cores=7 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.OMP.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_64node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_64node_nogpu.sh new file mode 100644 index 0000000..29ed037 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_64node_nogpu.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 25 +#BSUB -nnodes 64 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_cpu +#BSUB -o Sedov_cpu.%J +#BSUB -e Sedov_cpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=384 # 64 nodes * 6 mpi per node +n_omp=7 +n_gpu=0 +n_cores=7 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.OMP.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_8node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_8node_nogpu.sh new file mode 100644 index 0000000..1c0eb15 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_8node_nogpu.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 25 +#BSUB -nnodes 8 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_cpu +#BSUB -o Sedov_cpu.%J +#BSUB -e Sedov_cpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=48 # 8 nodes * 6 mpi per node +n_omp=7 +n_gpu=0 +n_cores=7 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.OMP.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph b/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph new file mode 100644 index 0000000..292a19f --- /dev/null +++ b/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph @@ -0,0 +1,57 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 0.01 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 # 0 => cart +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1 1 1 +amr.n_cell = 256 256 256 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp +#amr.grid_log = grdlog # name of grid logging file + +# REFINEMENT / REGRIDDING +amr.max_level = 0 # maximum level number allowed +amr.ref_ratio = 2 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 8 # block factor in grid generation +amr.max_grid_size = 64 + + +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# CHECKPOINT FILES +#amr.check_file = sedov_3d_chk # root name of checkpoint file +#amr.check_int = 200 # number of timesteps between checkpoints + +# PLOTFILES +#amr.plot_file = sedov_3d_plt +#amr.plot_int = 50 +amr.derive_plot_vars=ALL + +# PROBIN FILENAME +amr.probin_file = probin.3d.sph diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph_1level b/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph_1level new file mode 100644 index 0000000..459efe2 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph_1level @@ -0,0 +1,57 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 0.01 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 # 0 => cart +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1 1 1 +amr.n_cell = 256 256 256 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp +#amr.grid_log = grdlog # name of grid logging file + +# REFINEMENT / REGRIDDING +amr.max_level = 1 # maximum level number allowed +amr.ref_ratio = 4 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 8 # block factor in grid generation +amr.max_grid_size = 64 + + +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# CHECKPOINT FILES +#amr.check_file = sedov_3d_chk # root name of checkpoint file +#amr.check_int = 200 # number of timesteps between checkpoints + +# PLOTFILES +#amr.plot_file = sedov_3d_plt +#amr.plot_int = 50 +amr.derive_plot_vars=ALL + +# PROBIN FILENAME +amr.probin_file = probin.3d.sph diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/probin.3d.sph b/scaling/castro/sedov/summit_201905/GPU_runs/probin.3d.sph new file mode 100644 index 0000000..28bf051 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/GPU_runs/probin.3d.sph @@ -0,0 +1,29 @@ +&fortin + + r_init = 0.01 + p_ambient = 1.d-5 + exp_energy = 1.0 + dens_ambient = 1.0 + nsub = 10 + +/ + +&tagging + + denerr = 3 + dengrad = 0.01 + max_denerr_lev = 3 + max_dengrad_lev = 3 + + presserr = 3 + pressgrad = 0.01 + max_presserr_lev = 3 + max_pressgrad_lev = 3 + +/ + +&extern + + eos_assume_neutral = T + +/ diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_16nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_16nodes.sh new file mode 100644 index 0000000..70148b8 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_16nodes.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 30 +#BSUB -nnodes 16 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_gpu +#BSUB -o Sedov_gpu.%J +#BSUB -e Sedov_gpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=96 # 16 nodes * 6 gpu per node +n_omp=1 +n_gpu=1 +n_cores=1 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.CUDA.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_32nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_32nodes.sh new file mode 100644 index 0000000..1f3b7c8 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_32nodes.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 15 +#BSUB -nnodes 32 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_gpu +#BSUB -o Sedov_gpu.%J +#BSUB -e Sedov_gpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=192 # 16 nodes * 6 gpu per node +n_omp=1 +n_gpu=1 +n_cores=1 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.CUDA.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_4nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_4nodes.sh new file mode 100644 index 0000000..bd1200d --- /dev/null +++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_4nodes.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 15 +#BSUB -nnodes 4 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_gpu +#BSUB -o Sedov_gpu.%J +#BSUB -e Sedov_gpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=24 # 4 nodes * 6 gpu per node +n_omp=1 +n_gpu=1 +n_cores=1 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.CUDA.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_64nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_64nodes.sh new file mode 100644 index 0000000..e3e4e74 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_64nodes.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 15 +#BSUB -nnodes 64 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_gpu +#BSUB -o Sedov_gpu.%J +#BSUB -e Sedov_gpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=384 # 64 nodes * 6 gpu per node +n_omp=1 +n_gpu=1 +n_cores=1 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.CUDA.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_8nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_8nodes.sh new file mode 100644 index 0000000..76f9be3 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_8nodes.sh @@ -0,0 +1,24 @@ +#!/bin/bash +#BSUB -P ast106 +#BSUB -W 30 +#BSUB -nnodes 8 +#BSUB -alloc_flags smt1 +#BSUB -J Sedov_gpu +#BSUB -o Sedov_gpu.%J +#BSUB -e Sedov_gpu.%J + +cd $LS_SUBCWD + +inputs_file=inputs.3d.sph_1level + +n_mpi=48 # 8 nodes * 6 gpu per node +n_omp=1 +n_gpu=1 +n_cores=1 +n_rs_per_node=6 + +export OMP_NUM_THREADS=$n_omp + +Castro_ex=./Castro3d.pgi.MPI.CUDA.ex + +jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file diff --git a/scaling/castro/sedov/summit_201905/plot_scaling.py b/scaling/castro/sedov/summit_201905/plot_scaling.py new file mode 100644 index 0000000..3e3c355 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/plot_scaling.py @@ -0,0 +1,55 @@ +import numpy as np +import matplotlib.pyplot as plt + +data = np.loadtxt("summit_scaling_may19.txt") + +for i in range (0,40,5): + #print(i) + nodes = data[i:i+5,0] + avg_zone = data[i:i+5,2] + + if i in [0,5,20,25]: + if data[i,1]==256 : #blue for 256^3 and orange for 512^3 when using gpu + color = "C0" + else: + color = "C1" + else: + if data[i,1]==256 : #skyblue for 256^3 and red for 512^3 when using MPI+OpenMP + color = "C9" + else: + color = "C3" + + + + if i in [5,15,25,35]: #amr triangle marker, no amr circle + marker ="^" + else: + marker ="o" + + #plt.scatter(nodes, avg_zone) + plt.plot(nodes,avg_zone, marker+color, ls=":") + +plt.xlabel("number of nodes") +plt.ylabel(r"Avg # of zones advanced/ $\mu$s") + +#legends +legs = [] +legnames = [] +legs.append(plt.Line2D((0,1),(0,0), color = "C0")) +legnames.append(r"$256^3$ gpu") +legs.append(plt.Line2D((0,1),(0,0), color = "C1")) +legnames.append(r"$512^3$ gpu") +legs.append(plt.Line2D((0,1),(0,0), color = "C9")) +legnames.append(r"$256^3$ MPI+OMP") +legs.append(plt.Line2D((0,1),(0,0), color = "C3")) +legnames.append(r"$512^3$ MPI+OMP") +legs.append(plt.Line2D((0,1),(0,0), color="k", + marker="o", markeredgecolor="k", markerfacecolor="k", linestyle="none")) +legnames.append("no AMR") +legs.append(plt.Line2D((0,1),(0,0), color="k", + marker="^", markeredgecolor="k", markerfacecolor="k", linestyle="none")) +legnames.append("base + one 4x level") + +plt.legend(legs, legnames, frameon=False, fontsize="8", numpoints=1, loc=0, ncol=3) + +plt.savefig("summit_sedov.png", dpi=150) diff --git a/scaling/castro/sedov/summit_201905/summit_scaling_may19.txt b/scaling/castro/sedov/summit_201905/summit_scaling_may19.txt new file mode 100644 index 0000000..3f00675 --- /dev/null +++ b/scaling/castro/sedov/summit_201905/summit_scaling_may19.txt @@ -0,0 +1,70 @@ + + +#no AMR ---- gpu : 6 per node, amr.max_grid_size=64, blocking_factor=8 +#nodes grid size average # zones perusec + 4 256 61.312 + 8 256 89.165 + 16 256 154.102 + 32 256 230.673 + 64 256 313.496 + + +#1 x4 level ----- gpu +#nodes grid_size average # zones per usec + 4 256 42.297 + 8 256 54.026 + 16 256 70.412 + 32 256 86.652 + 64 256 98.515 + + +#no AMR ---- MPI+OpenMP : 6 MPI + 7 OpenMP per node +#nodes grid_size average # zones per usec + 4 256 3.790 + 8 256 6.826 + 16 256 11.335 + 32 256 21.504 + 64 256 22.354 + +#1 x4 level ----- MPI+OpenMP +#nodes grid_size average # zones per usec + 4 256 2.527 + 8 256 5.027 + 16 256 7.916 + 32 256 15.190 + 64 256 14.253 + +#no AMR ---- gpu : 6 per node, amr.max_grid_size=64, blocking_factor=8 +#nodes grid size average # zones perusec + 4 512 73.328 + 8 512 143.232 + 16 512 251.890 + 32 512 468.750 + 64 512 695.104 + + +#1 x4 level ----- gpu +#nodes grid_size average # zones per usec + 4 512 62.00 + 8 512 117.079 + 16 512 188.958 + 32 512 292.351 + 64 512 387.022 + + +#no AMR ---- MPI+OpenMP : 6 MPI + 7 OpenMP per node +#nodes grid_size average # zones per usec + 4 512 4.077 + 8 512 8.006 + 16 512 14.406 + 32 512 30.299 + 64 512 49.173 + +#1 x4 level ----- MPI+OpenMP +#nodes grid_size average # zones per usec + 4 512 3.307 + 8 512 6.203 + 16 512 12.366 + 32 512 22.265 + 64 512 41.535 + diff --git a/scaling/castro/sedov/titan_20171011/1level/avg_time.sh b/scaling/castro/sedov/titan_20171011/1level/avg_time.sh new file mode 100755 index 0000000..d5486dc --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -5 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/sedov/titan_20171011/1level/inputs.starlord b/scaling/castro/sedov/titan_20171011/1level/inputs.starlord new file mode 100644 index 0000000..49f2021 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/inputs.starlord @@ -0,0 +1,49 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 1.0e-2 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1.0e9 1.0e9 1.0e9 +amr.n_cell = 1024 1024 1024 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 +castro.do_ctu = 0 +castro.allow_negative_energy = 0 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp + +# REFINEMENT / REGRIDDING +amr.max_level = 1 # maximum level number allowed +amr.ref_ratio = 2 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 16 # block factor in grid generation +amr.max_grid_size = 64 + +# CHECKPOINT FILES +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# PROBIN FILENAME +amr.probin_file = probin.starlord diff --git a/scaling/castro/sedov/titan_20171011/1level/probin.starlord b/scaling/castro/sedov/titan_20171011/1level/probin.starlord new file mode 100644 index 0000000..1dfabc0 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/probin.starlord @@ -0,0 +1,27 @@ +&fortin + + r_init = 3.0d8 + p_ambient = 1.d21 + exp_energy = 1.d52 + dens_ambient = 1.d4 + nsub = 1 + +/ + +&tagging + + denerr = 3 + dengrad = 1.e4 + max_denerr_lev = 0 + max_dengrad_lev = 3 + + presserr = 3 + pressgrad = 1.e4 + max_presserr_lev = 0 + max_pressgrad_lev = 3 + +/ + +&extern + +/ diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI1024-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI1024-PE16.run new file mode 100644 index 0000000..b25d5d4 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI1024-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:45:00,nodes=64 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 1024 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP16-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP16-PE16.run new file mode 100644 index 0000000..962fe25 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP16-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=2048 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=16 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 1 -d 16 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP2-PE16.run new file mode 100644 index 0000000..d3b396e --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 4 -d 2 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP4-PE16.run new file mode 100644 index 0000000..32ac1d1 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP4-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=512 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP8-PE16.run new file mode 100644 index 0000000..44c8361 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP8-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 1 -d 8 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-PE16.run new file mode 100644 index 0000000..27284b2 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=128 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP2-PE16.run new file mode 100644 index 0000000..4a1a33b --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP2-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=512 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -S 4 -d 2 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP4-PE16.run new file mode 100644 index 0000000..167002d --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP8-PE16.run new file mode 100644 index 0000000..30e628a --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP8-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=2048 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -S 1 -d 8 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-PE16.run new file mode 100644 index 0000000..edca485 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/avg_time.sh b/scaling/castro/sedov/titan_20171011/1level_4x/avg_time.sh new file mode 100755 index 0000000..d5486dc --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -5 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/inputs.starlord b/scaling/castro/sedov/titan_20171011/1level_4x/inputs.starlord new file mode 100644 index 0000000..3e26dba --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/inputs.starlord @@ -0,0 +1,49 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 1.0e-2 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1.0e9 1.0e9 1.0e9 +amr.n_cell = 1024 1024 1024 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 +castro.do_ctu = 0 +castro.allow_negative_energy = 0 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp + +# REFINEMENT / REGRIDDING +amr.max_level = 1 # maximum level number allowed +amr.ref_ratio = 4 2 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 16 # block factor in grid generation +amr.max_grid_size = 64 + +# CHECKPOINT FILES +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# PROBIN FILENAME +amr.probin_file = probin.starlord diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/probin.starlord b/scaling/castro/sedov/titan_20171011/1level_4x/probin.starlord new file mode 100644 index 0000000..1dfabc0 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/probin.starlord @@ -0,0 +1,27 @@ +&fortin + + r_init = 3.0d8 + p_ambient = 1.d21 + exp_energy = 1.d52 + dens_ambient = 1.d4 + nsub = 1 + +/ + +&tagging + + denerr = 3 + dengrad = 1.e4 + max_denerr_lev = 0 + max_dengrad_lev = 3 + + presserr = 3 + pressgrad = 1.e4 + max_presserr_lev = 0 + max_pressgrad_lev = 3 + +/ + +&extern + +/ diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP2-PE16.run new file mode 100644 index 0000000..d3b396e --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 4 -d 2 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP4-PE16.run new file mode 100644 index 0000000..32ac1d1 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP4-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=512 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP8-PE16.run new file mode 100644 index 0000000..44c8361 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP8-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 1 -d 8 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-PE16.run new file mode 100644 index 0000000..27284b2 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=128 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP2-PE16.run new file mode 100644 index 0000000..4a1a33b --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP2-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=512 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -S 4 -d 2 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP4-PE16.run new file mode 100644 index 0000000..167002d --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP8-PE16.run new file mode 100644 index 0000000..30e628a --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP8-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=2048 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -S 1 -d 8 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-PE16.run new file mode 100644 index 0000000..6ee9a43 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-2lev-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/64_1024/inputs.starlord b/scaling/castro/sedov/titan_20171011/64_1024/inputs.starlord new file mode 100644 index 0000000..c7d7f30 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1024/inputs.starlord @@ -0,0 +1,49 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 1.0e-2 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1.0e9 1.0e9 1.0e9 +amr.n_cell = 1024 1024 1024 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 +castro.do_ctu = 0 +castro.allow_negative_energy = 0 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp + +# REFINEMENT / REGRIDDING +amr.max_level = 0 # maximum level number allowed +amr.ref_ratio = 2 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 4 # block factor in grid generation +amr.max_grid_size = 64 + +# CHECKPOINT FILES +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# PROBIN FILENAME +amr.probin_file = probin.starlord diff --git a/scaling/castro/sedov/titan_20171011/64_1024/probin.starlord b/scaling/castro/sedov/titan_20171011/64_1024/probin.starlord new file mode 100644 index 0000000..bfaca7e --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1024/probin.starlord @@ -0,0 +1,27 @@ +&fortin + + r_init = 1.25d8 + p_ambient = 1.d21 + exp_energy = 1.d52 + dens_ambient = 1.d4 + nsub = 10 + +/ + +&tagging + + denerr = 3 + dengrad = 0.01 + max_denerr_lev = 3 + max_dengrad_lev = 3 + + presserr = 3 + pressgrad = 0.01 + max_presserr_lev = 3 + max_pressgrad_lev = 3 + +/ + +&extern + +/ diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI-PE16.run new file mode 100644 index 0000000..5038a7c --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1024-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI2-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI2-PE16.run new file mode 100644 index 0000000..5139169 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1024-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=128 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP2-PE16.run new file mode 100644 index 0000000..42881ec --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP2-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1024-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=512 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP4-PE16.run new file mode 100644 index 0000000..1318217 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1024-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP8-PE16.run new file mode 100644 index 0000000..2acaaeb --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP8-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-4096-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=2048 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/64_1536/inputs.starlord b/scaling/castro/sedov/titan_20171011/64_1536/inputs.starlord new file mode 100644 index 0000000..29cf89a --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1536/inputs.starlord @@ -0,0 +1,49 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 1.0e-2 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1.0e9 1.0e9 1.0e9 +amr.n_cell = 1536 1536 1536 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 +castro.do_ctu = 0 +castro.allow_negative_energy = 0 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp + +# REFINEMENT / REGRIDDING +amr.max_level = 0 # maximum level number allowed +amr.ref_ratio = 2 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 4 # block factor in grid generation +amr.max_grid_size = 64 + +# CHECKPOINT FILES +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# PROBIN FILENAME +amr.probin_file = probin.starlord diff --git a/scaling/castro/sedov/titan_20171011/64_1536/probin.starlord b/scaling/castro/sedov/titan_20171011/64_1536/probin.starlord new file mode 100644 index 0000000..bfaca7e --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1536/probin.starlord @@ -0,0 +1,27 @@ +&fortin + + r_init = 1.25d8 + p_ambient = 1.d21 + exp_energy = 1.d52 + dens_ambient = 1.d4 + nsub = 10 + +/ + +&tagging + + denerr = 3 + dengrad = 0.01 + max_denerr_lev = 3 + max_dengrad_lev = 3 + + presserr = 3 + pressgrad = 0.01 + max_presserr_lev = 3 + max_pressgrad_lev = 3 + +/ + +&extern + +/ diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI-PE16.run new file mode 100644 index 0000000..4175bea --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1024-scale +#PBS -j oe +#PBS -l walltime=0:20:00,nodes=864 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 13824 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI2-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI2-PE16.run new file mode 100644 index 0000000..019a694 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1536-scale +#PBS -j oe +#PBS -l walltime=0:20:00,nodes=432 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 6912 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP2-PE16.run new file mode 100644 index 0000000..16e4ed7 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP2-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1536-scale +#PBS -j oe +#PBS -l walltime=0:20:00,nodes=1728 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 13824 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP4-PE16.run new file mode 100644 index 0000000..de39ceb --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1536-scale +#PBS -j oe +#PBS -l walltime=0:20:00,nodes=3456 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 13824 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP8-PE16.run new file mode 100644 index 0000000..3531d2d --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP8-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-1536-scale +#PBS -j oe +#PBS -l walltime=0:20:00,nodes=6912 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 13824 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/64_512/avg_time.sh b/scaling/castro/sedov/titan_20171011/64_512/avg_time.sh new file mode 100755 index 0000000..d5486dc --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_512/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -5 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/sedov/titan_20171011/64_512/inputs.starlord b/scaling/castro/sedov/titan_20171011/64_512/inputs.starlord new file mode 100644 index 0000000..c3beffc --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_512/inputs.starlord @@ -0,0 +1,49 @@ +# ------------------ INPUTS TO MAIN PROGRAM ------------------- +max_step = 10 +stop_time = 1.0e-2 + +# PROBLEM SIZE & GEOMETRY +geometry.is_periodic = 0 0 0 +geometry.coord_sys = 0 +geometry.prob_lo = 0 0 0 +geometry.prob_hi = 1.0e9 1.0e9 1.0e9 +amr.n_cell = 512 512 512 + +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +# >>>>>>>>>>>>> BC FLAGS <<<<<<<<<<<<<<<< +castro.lo_bc = 2 2 2 +castro.hi_bc = 2 2 2 + +# WHICH PHYSICS +castro.do_hydro = 1 +castro.do_react = 0 +castro.ppm_type = 1 +castro.do_ctu = 0 +castro.allow_negative_energy = 0 + +# TIME STEP CONTROL +castro.cfl = 0.5 # cfl number for hyperbolic system +castro.init_shrink = 0.01 # scale back initial timestep +castro.change_max = 1.1 # maximum increase in dt over successive steps + +# DIAGNOSTICS & VERBOSITY +castro.sum_interval = 1 # timesteps between computing mass +castro.v = 1 # verbosity in Castro.cpp +amr.v = 1 # verbosity in Amr.cpp + +# REFINEMENT / REGRIDDING +amr.max_level = 0 # maximum level number allowed +amr.ref_ratio = 2 2 2 2 # refinement ratio +amr.regrid_int = 2 # how often to regrid +amr.blocking_factor = 4 # block factor in grid generation +amr.max_grid_size = 64 + +# CHECKPOINT FILES +amr.checkpoint_files_output = 0 +amr.plot_files_output = 0 + +# PROBIN FILENAME +amr.probin_file = probin.starlord diff --git a/scaling/castro/sedov/titan_20171011/64_512/probin.starlord b/scaling/castro/sedov/titan_20171011/64_512/probin.starlord new file mode 100644 index 0000000..bfaca7e --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_512/probin.starlord @@ -0,0 +1,27 @@ +&fortin + + r_init = 1.25d8 + p_ambient = 1.d21 + exp_energy = 1.d52 + dens_ambient = 1.d4 + nsub = 10 + +/ + +&tagging + + denerr = 3 + dengrad = 0.01 + max_denerr_lev = 3 + max_dengrad_lev = 3 + + presserr = 3 + pressgrad = 0.01 + max_presserr_lev = 3 + max_pressgrad_lev = 3 + +/ + +&extern + +/ diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI-PE16.run new file mode 100644 index 0000000..fda4d92 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=32 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI2-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI2-PE16.run new file mode 100644 index 0000000..b23b3a7 --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=16 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 256 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + + diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP2-PE16.run new file mode 100644 index 0000000..07dadbf --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP2-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=64 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP4-PE16.run new file mode 100644 index 0000000..2e78e9b --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=128 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP8-PE16.run new file mode 100644 index 0000000..d1b2aee --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP8-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N sedov-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord + diff --git a/scaling/castro/sedov/titan_20171011/castro-sedov-scaling.txt b/scaling/castro/sedov/titan_20171011/castro-sedov-scaling.txt new file mode 100644 index 0000000..2546b0c --- /dev/null +++ b/scaling/castro/sedov/titan_20171011/castro-sedov-scaling.txt @@ -0,0 +1,56 @@ +# this was run on 2017-10-07 using the Sedov in 3-d on +# titan with helmeos and inputs.starlord with the PGI 17.7 compilers. +# +# run for 10 steps and then average time for last 5 + +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev + 256 1 256 16 64 512 0 27.3737 0.037914 + 512 1 512 32 64 512 0 13.797 0.0480446 + 512 2 1024 64 64 512 0 7.02318 0.0126529 + 512 4 2048 128 64 512 0 3.61333 0.00935958 + 512 8 4096 256 64 512 0 2.01249 0.0299526 + + 2048 1 2048 128 64 1024 0 27.4302 0.039496 + 4096 1 4096 256 64 1024 0 14.0757 0.276087 + 4096 2 8192 512 64 1024 0 7.11302 0.00464732 + 4096 4 16384 1024 64 1024 0 3.67513 0.00847975 + 4096 8 32768 2048 64 1024 0 2.05405 0.0142505 + + 6912 1 6912 432 64 1536 0 27.5459 0.100138 +13824 1 13824 864 64 1536 0 14.041 0.0896588 +13824 2 27648 1728 64 1536 0 7.15245 0.0197148 +13824 4 55296 3456 64 1536 0 3.7265 0.00461254 +13824 8 110592 6912 64 1536 0 2.1079 0.0216659 + + + +# 1 level, blocking_factor = 8 +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev +# 2048 1 2048 128 64 1024 1 62.9596 7.87975 +# 4096 1 4096 256 64 1024 1 44.4701 10.4288 +# 4096 2 8192 512 64 1024 1 23.5499 5.92599 +# 4096 4 16384 1024 64 1024 1 13.4561 3.67212 +# 4096 8 32768 2048 64 1024 1 8.52401 2.89531 + + +# 1 level, blocking_factor = 16 (can't compare these #s to above, since more zones with this blocking factor) +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev + 2048 1 2048 128 64 1024 1 78.0444 7.11191 + 2048 2 4096 256 64 1024 1 41.9756 3.8821 + 2048 4 8192 512 64 1024 1 24.5403 2.40322 + 2048 8 16384 1024 64 1024 1 14.2543 1.57262 + 2048 16 32768 2048 64 1024 1 8.70782 1.3139 +# 4096 1 4096 256 64 1024 1 51.487 13.5009 +# 4096 2 8192 512 64 1024 1 26.6659 7.14374 +# 4096 4 16384 1024 64 1024 1 14.9838 4.11424 +# 4096 8 32768 2048 64 1024 1 8.88012 2.59245 + + +# 1 4x level, blocking factor = 16 +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev +# 2048 1 2048 128 64 1024 1 OOM +# 4096 1 4096 256 64 1024 1 OOM + 4096 2 8192 512 64 1024 14 135.137 6.23752 + 4096 4 16384 1024 64 1024 14 79.4846 4.28278 + 4096 8 32768 2048 64 1024 14 47.4894 3.41423 + diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_128.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_128.knl.MPI.OMP.slurm new file mode 100644 index 0000000..68cca56 --- /dev/null +++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_128.knl.MPI.OMP.slurm @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH -N 128 +#SBATCH -C knl,quad,cache +#SBATCH -p debug +#SBATCH -t 00:20:00 +#SBATCH -J test3 +#SBATCH -A m1938 + + +export OMP_PROC_BIND=close +export OMP_PLACES=threads + +export OMP_NUM_THREADS=16 + +cd $SLURM_SUBMIT_DIR + + + +srun -n 2048 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.MPI.OMP.ex inputs_test_wdmerger_3D + + + diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_16.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_16.knl.MPI.OMP.slurm new file mode 100644 index 0000000..855e9fc --- /dev/null +++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_16.knl.MPI.OMP.slurm @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH -N 16 +#SBATCH -C knl,quad,cache +#SBATCH -p debug +#SBATCH -t 00:30:00 +#SBATCH -J test3 +#SBATCH -A m1938 + + +export OMP_PROC_BIND=close +export OMP_PLACES=threads + +export OMP_NUM_THREADS=16 + +cd $SLURM_SUBMIT_DIR + + + +srun -n 256 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.MPI.OMP.ex inputs_test_wdmerger_3D + + + diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_32.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_32.knl.MPI.OMP.slurm new file mode 100644 index 0000000..b268f78 --- /dev/null +++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_32.knl.MPI.OMP.slurm @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH -N 32 +#SBATCH -C knl,quad,cache +#SBATCH -p regular +#SBATCH -t 00:45:00 +#SBATCH -J test3 +#SBATCH -A m1938 + + +export OMP_PROC_BIND=close +export OMP_PLACES=threads + +export OMP_NUM_THREADS=16 + +cd $SLURM_SUBMIT_DIR + + + +srun -n 512 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.MPI.OMP.ex inputs_test_wdmerger_3D + + + diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_64.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_64.knl.MPI.OMP.slurm new file mode 100644 index 0000000..40133c7 --- /dev/null +++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_64.knl.MPI.OMP.slurm @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH -N 64 +#SBATCH -C knl,quad,cache +#SBATCH -p debug +#SBATCH -t 00:20:00 +#SBATCH -J test3 +#SBATCH -A m1938 + + +export OMP_PROC_BIND=close +export OMP_PLACES=threads + +export OMP_NUM_THREADS=16 + +cd $SLURM_SUBMIT_DIR + + + +srun -n 1024 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.MPI.OMP.ex inputs_test_wdmerger_3D + + + diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_96.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_96.knl.MPI.OMP.slurm new file mode 100644 index 0000000..b87990d --- /dev/null +++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_96.knl.MPI.OMP.slurm @@ -0,0 +1,22 @@ +#!/bin/bash +#SBATCH -N 96 +#SBATCH -C knl,quad,cache +#SBATCH -p regular +#SBATCH -t 01:30:00 +#SBATCH -J test3 +#SBATCH -A m1938 + + +export OMP_PROC_BIND=close +export OMP_PLACES=threads + +export OMP_NUM_THREADS=16 + +cd $SLURM_SUBMIT_DIR + + + +srun -n 1536 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.DEBUG.MPI.OMP.ex inputs_test_wdmerger_3D + + + diff --git a/scaling/castro/wdmerger/cori_201710/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/cori_201710/inputs_test_wdmerger_3D new file mode 100644 index 0000000..5355835 --- /dev/null +++ b/scaling/castro/wdmerger/cori_201710/inputs_test_wdmerger_3D @@ -0,0 +1,119 @@ + +############################## CASTRO INPUTS ############################################### + +############################################################################################ +# Problem setup +############################################################################################ + +amr.probin_file = probin_test_wdmerger_3D # Name of the probin file + +max_step = 10 # Maximum coarse timestep + +geometry.is_periodic = 0 0 0 # Non-periodic boundary conditions + +geometry.coord_sys = 0 # Cartesian coordinate system + +geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9 # Lower boundary limits in physical space +geometry.prob_hi = 5.12e9 5.12e9 5.12e9 # Upper boundary limits in physical space +castro.center = 0.0e0 0.0e0 0.0e0 # System center of mass + +castro.cfl = 0.5 # CFL number for hyperbolic system +castro.init_shrink = 0.1 # Scale back initial timestep by this factor +castro.change_max = 1.1 # Factor by which dt is allowed to change each timestep +castro.hard_cfl_limit = 0 # Whether to abort a simulation if the CFL criterion is locally violated + +############################################################################################ +# Boundary conditions +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +############################################################################################ + +castro.lo_bc = 2 2 2 # Boundary conditions on lo x, y, and z edges +castro.hi_bc = 2 2 2 # Boundary conditions on hi x, y, and z edges + +############################################################################################ +# Resolution, gridding and AMR +############################################################################################ + +amr.n_cell = 512 512 512 # Number of cells on the coarse grid + +amr.max_level = 0 # Maximum level number allowed +amr.ref_ratio = 4 2 + +amr.max_grid_size = 32 48 # Maximum grid size at each level +amr.blocking_factor = 16 # Grid sizes must be a multiple of this + +amr.grid_eff = 0.9 # What constitutes an efficient grid + +############################################################################################ +# Physics to include +############################################################################################ + +castro.do_hydro = 1 # Whether or not to do hydrodynamics +castro.do_grav = 1 # Whether or not to do gravity +castro.do_react = 0 # Whether or not to do reactions +castro.do_sponge = 1 # Whether or not to apply the sponge +castro.add_ext_src = 1 # Whether or not to apply external source terms +castro.do_rotation = 0 # Whether or not to include the rotation source term +castro.rotational_period = 100.0 # Rotational period of the rotating reference frame +castro.rotational_dPdt = -0.0 # Time rate of change of the rotational period +castro.implicit_rotation_update = 1 # Implicit rotation coupling + +############################################################################################ +# PPM options +############################################################################################ + +castro.ppm_type = 1 # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters) +castro.ppm_reference = 1 # Whether we subtract off a reference state in PPM +castro.ppm_reference_eigenvectors = 1 # Whether to evaluate eigenvectors using the reference state +castro.ppm_reference_edge_limit = 1 # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state +castro.ppm_temp_fix = 0 # Use the EOS in calculation of the edge states going into the Riemann solver +castro.grav_source_type = 4 # How to include the gravity source term in the hydro equations +castro.rot_source_type = 4 # How to include the rotation source term in the hydro equations + +############################################################################################ +# Thermodynamics +############################################################################################ + +castro.small_temp = 1.e5 # Minimum allowable temperature (K) +castro.small_dens = 1.e-5 # Minimum allowable density (g / cm**3) + +castro.allow_negative_energy = 0 # Disable the possibility of having a negative energy + +castro.dual_energy_update_E_from_e = 0 # Don't update the total energy using the internal energy +castro.dual_energy_eta1 = 1.0e-3 # Threshold for when to use the internal energy in calculating pressure +castro.dual_energy_eta2 = 1.0e-1 # Threshold for when to use (E - K) in updating internal energy + +############################################################################################ +# Gravity +############################################################################################ + +gravity.gravity_type = PoissonGrav # Full self-gravity with the Poisson equation +gravity.max_multipole_order = 6 # Multipole expansion includes terms up to r**(-max_multipole_order) +gravity.rel_tol = 1.e-10 # Relative tolerance for multigrid solver +gravity.no_sync = 1 # Turn off sync solve for gravity after refluxing + +############################################################################################ +# Diagnostics and I/O +############################################################################################ + +amr.plot_files_output = 0 # Whether or not to output plotfiles +amr.checkpoint_files_output = 0 # Whether or not to output checkpoints + +amr.check_file = chk # Root name of checkpoint file +amr.check_int = 10 # Number of timesteps between checkpoints +amr.plot_file = plt # Root name of plot file +amr.plot_int = 10 # Number of timesteps between plotfiles + +amr.v = 1 # Control verbosity in Amr.cpp +castro.v = 0 # Control verbosity in Castro.cpp + +castro.print_fortran_warnings = 0 + +gravity.v = 0 # Control verbosity in Gravity.cpp +mg.v = 0 # Control verbosity in the multigrid solver + +amr.derive_plot_vars = NONE # Calculate all variables for plotfiles, including derived variables + + diff --git a/scaling/castro/wdmerger/cori_201710/probin_test_wdmerger_3D b/scaling/castro/wdmerger/cori_201710/probin_test_wdmerger_3D new file mode 100644 index 0000000..9fa6ddb --- /dev/null +++ b/scaling/castro/wdmerger/cori_201710/probin_test_wdmerger_3D @@ -0,0 +1,39 @@ +&fortin + mass_P = 0.90 + mass_S = 0.90 + + problem = 2 + + roche_radius_factor = 1.0d0 + + ambient_density = 1.0d-4 + + stellar_temp = 1.0d7 + ambient_temp = 1.0d7 + + orbital_eccentricity = 0.0d0 + orbital_angle = 0.0d0 + + max_tagging_radius = 0.75d0 + stellar_density_threshold = 1.0d0 + + smallu = 1.0d-12 + + fill_ambient_bc = F +/ + +&tagging + max_temperr_lev = 4 + temperr = 2.d8 +/ + +&sponge + sponge_lower_radius = 3.840d9 + sponge_upper_radius = 4.352d9 + sponge_timescale = 0.01d0 +/ + +&extern + use_eos_coulomb = F + eos_input_is_constant = T +/ diff --git a/scaling/castro/wdmerger/summitdev_201806/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/summitdev_201806/inputs_test_wdmerger_3D new file mode 100644 index 0000000..9a6b8ea --- /dev/null +++ b/scaling/castro/wdmerger/summitdev_201806/inputs_test_wdmerger_3D @@ -0,0 +1,117 @@ +############################## CASTRO INPUTS ############################################### + +############################################################################################ +# Problem setup +############################################################################################ + +amr.probin_file = probin_test_wdmerger_3D # Name of the probin file + +max_step = 5 # Maximum coarse timestep + +geometry.is_periodic = 0 0 0 # Non-periodic boundary conditions + +geometry.coord_sys = 0 # Cartesian coordinate system + +geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9 # Lower boundary limits in physical space +geometry.prob_hi = 5.12e9 5.12e9 5.12e9 # Upper boundary limits in physical space +castro.center = 0.0e0 0.0e0 0.0e0 # System center of mass + +castro.cfl = 0.5 # CFL number for hyperbolic system +castro.init_shrink = 0.1 # Scale back initial timestep by this factor +castro.change_max = 1.1 # Factor by which dt is allowed to change each timestep +castro.hard_cfl_limit = 0 # Whether to abort a simulation if the CFL criterion is locally violated + +############################################################################################ +# Boundary conditions +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +############################################################################################ + +castro.lo_bc = 2 2 2 # Boundary conditions on lo x, y, and z edges +castro.hi_bc = 2 2 2 # Boundary conditions on hi x, y, and z edges + +############################################################################################ +# Resolution, gridding and AMR +############################################################################################ + +amr.n_cell = 512 512 512 # Number of cells on the coarse grid + +amr.max_level = 0 # Maximum level number allowed +amr.ref_ratio = 4 2 + +amr.max_grid_size = 32 48 # Maximum grid size at each level +amr.blocking_factor = 16 # Grid sizes must be a multiple of this + +amr.grid_eff = 0.9 # What constitutes an efficient grid + +############################################################################################ +# Physics to include +############################################################################################ + +castro.do_hydro = 1 # Whether or not to do hydrodynamics +castro.do_grav = 1 # Whether or not to do gravity +castro.do_react = 0 # Whether or not to do reactions +castro.do_sponge = 1 # Whether or not to apply the sponge +castro.add_ext_src = 1 # Whether or not to apply external source terms +castro.do_rotation = 0 # Whether or not to include the rotation source term +castro.rotational_period = 100.0 # Rotational period of the rotating reference frame +castro.rotational_dPdt = -0.0 # Time rate of change of the rotational period +castro.implicit_rotation_update = 1 # Implicit rotation coupling + +############################################################################################ +# PPM options +############################################################################################ + +castro.ppm_type = 1 # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters) +castro.ppm_reference = 1 # Whether we subtract off a reference state in PPM +castro.ppm_reference_eigenvectors = 1 # Whether to evaluate eigenvectors using the reference state +castro.ppm_reference_edge_limit = 1 # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state +castro.ppm_temp_fix = 0 # Use the EOS in calculation of the edge states going into the Riemann solver +castro.grav_source_type = 4 # How to include the gravity source term in the hydro equations +castro.rot_source_type = 4 # How to include the rotation source term in the hydro equations + +############################################################################################ +# Thermodynamics +############################################################################################ + +castro.small_temp = 1.e5 # Minimum allowable temperature (K) +castro.small_dens = 1.e-5 # Minimum allowable density (g / cm**3) + +castro.allow_negative_energy = 0 # Disable the possibility of having a negative energy + +castro.dual_energy_update_E_from_e = 0 # Don't update the total energy using the internal energy +castro.dual_energy_eta1 = 1.0e-3 # Threshold for when to use the internal energy in calculating pressure +castro.dual_energy_eta2 = 1.0e-1 # Threshold for when to use (E - K) in updating internal energy + +############################################################################################ +# Gravity +############################################################################################ + +gravity.gravity_type = PoissonGrav # Full self-gravity with the Poisson equation +gravity.max_multipole_order = 6 # Multipole expansion includes terms up to r**(-max_multipole_order) +gravity.rel_tol = 1.e-10 # Relative tolerance for multigrid solver +gravity.no_sync = 1 # Turn off sync solve for gravity after refluxing +############################################################################################ +# Diagnostics and I/O +############################################################################################ + +amr.plot_files_output = 0 # Whether or not to output plotfiles +amr.checkpoint_files_output = 0 # Whether or not to output checkpoints + +amr.check_file = chk # Root name of checkpoint file +amr.check_int = 10 # Number of timesteps between checkpoints +amr.plot_file = plt # Root name of plot file +amr.plot_int = 10 # Number of timesteps between plotfiles + +amr.v = 1 # Control verbosity in Amr.cpp +castro.v = 0 # Control verbosity in Castro.cpp + +castro.print_fortran_warnings = 0 + +gravity.v = 0 # Control verbosity in Gravity.cpp +mg.v = 0 # Control verbosity in the multigrid solver + +amr.derive_plot_vars = NONE # Calculate all variables for plotfiles, including derived variables + + diff --git a/scaling/castro/wdmerger/summitdev_201806/probin_test_wdmerger_3D b/scaling/castro/wdmerger/summitdev_201806/probin_test_wdmerger_3D new file mode 100644 index 0000000..4dbe0ca --- /dev/null +++ b/scaling/castro/wdmerger/summitdev_201806/probin_test_wdmerger_3D @@ -0,0 +1,39 @@ +&fortin + mass_P = 0.90 + mass_S = 0.90 + + problem = 2 + + roche_radius_factor = 1.0d0 + + ambient_density = 1.0d-4 + + stellar_temp = 1.0d7 + ambient_temp = 1.0d7 + + orbital_eccentricity = 0.0d0 + orbital_angle = 0.0d0 + + max_tagging_radius = 0.75d0 + stellar_density_threshold = 1.0d0 + + smallu = 1.0d-12 + +/ + +&tagging + max_temperr_lev = 4 + temperr = 2.d8 +/ + +&sponge + sponge_lower_radius = 3.840d9 + sponge_upper_radius = 4.352d9 + sponge_timescale = 0.01d0 +/ + +&extern + use_eos_coulomb = F + eos_input_is_constant = T +/ + diff --git a/scaling/castro/wdmerger/summitdev_201806/scaling.txt b/scaling/castro/wdmerger/summitdev_201806/scaling.txt new file mode 100644 index 0000000..0b618b5 --- /dev/null +++ b/scaling/castro/wdmerger/summitdev_201806/scaling.txt @@ -0,0 +1,40 @@ + +obtained the average time using + +grep -i "Coarse TimeStep" nvme_test.o320568 | awk '{sum += $6; count +=1} END {print sum/count}' + +For 10 steps ********************** +amr.n_cepp = 256, amr.max_level = 0 + +#nodes #cores #MPI #time + +4 80 80 11.8403 + +8 160 160 6.79126 + +16 320 320 9.65454 + +amr.n_cell = 256, amr.max_level = 1 + +4 86.9711 + +8 50.8912 + +16 + +amr.n_cell = 512, amr.max_level = 0 + +4 86.5123 + +8 43.7392 + +16 + +For 5 steps ************************** +amr.n_cell = 512, amr.max_level = 1 + +4 + +8 278.195 + +16 diff --git a/scaling/castro/wdmerger/summitdev_201806/summitd-160MPI.run b/scaling/castro/wdmerger/summitdev_201806/summitd-160MPI.run new file mode 100644 index 0000000..bd70ba7 --- /dev/null +++ b/scaling/castro/wdmerger/summitdev_201806/summitd-160MPI.run @@ -0,0 +1,12 @@ +#!/bin/bash +#BSUB -P AST106SUMDEV +#BSUB -J wdmerger-160-scale +#BSUB -o nvme_test.o%J +#BSUB -W 00:50 +#BSUB -nnodes 8 + +cd /lustre/atlas/scratch/$USER/ast106/wdmerger/512/1amr_8nodes_1thread + +export OMP_NUM_THREADS=1 + +jsrun -n 160 -a 1 -c 1 ./Castro3d.gnu.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/summitdev_201806/summitd-320MPI.run b/scaling/castro/wdmerger/summitdev_201806/summitd-320MPI.run new file mode 100644 index 0000000..f495997 --- /dev/null +++ b/scaling/castro/wdmerger/summitdev_201806/summitd-320MPI.run @@ -0,0 +1,12 @@ +#!/bin/bash +#BSUB -P AST106SUMDEV +#BSUB -J wdmerger-320-scale +#BSUB -o nvme_test.o%J +#BSUB -W 00:30 +#BSUB -nnodes 16 + +cd /lustre/atlas/scratch/$USER/ast106/wdmerger/512/0amr_16nodes_1thread + +export OMP_NUM_THREADS=1 + +jsrun -n 320 -a 1 -c 1 ./Castro3d.gnu.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/summitdev_201806/summitd-80MPI.run b/scaling/castro/wdmerger/summitdev_201806/summitd-80MPI.run new file mode 100644 index 0000000..cacf2da --- /dev/null +++ b/scaling/castro/wdmerger/summitdev_201806/summitd-80MPI.run @@ -0,0 +1,12 @@ +#!/bin/bash +#BSUB -P AST106SUMDEV +#BSUB -J wdmerger-80-scale +#BSUB -o nvme_test.o%J +#BSUB -W 02:25 +#BSUB -nnodes 4 + +cd /lustre/atlas/scratch/$USER/ast106/wdmerger/512/1amr_4nodes_1thread + +export OMP_NUM_THREADS=1 + +jsrun -n 80 -a 1 -c 1 ./Castro3d.gnu.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_041516/README b/scaling/castro/wdmerger/titan_041516/README new file mode 100644 index 0000000..f154f13 --- /dev/null +++ b/scaling/castro/wdmerger/titan_041516/README @@ -0,0 +1,10 @@ +Scaling data for the wdmerger problem on Titan on April 15, 2016. + +I ran tests with one, two, and three AMR levels (the higher levels refine +around the stars). For all three I ran in the 4 MPI ranks + 4 OMP threads +per node configuration, plus a few cases of 2 MPI + 8 OMP for the three level +test. All data is for 10 timesteps per run. + +For the highest number of processors on the two-level run I also used +the space-filling curve distribution mapping rather than my default +knapsack mapping, and this was about 15% faster. diff --git a/scaling/castro/wdmerger/titan_041516/one_level_omp4.txt b/scaling/castro/wdmerger/titan_041516/one_level_omp4.txt new file mode 100644 index 0000000..1c79d5e --- /dev/null +++ b/scaling/castro/wdmerger/titan_041516/one_level_omp4.txt @@ -0,0 +1,5 @@ + PROC AVG MIN MAX + 64 28.0 27.7 28.7 + 128 14.3 14.1 14.9 + 256 7.3 7.1 7.4 + diff --git a/scaling/castro/wdmerger/titan_041516/three_level_omp4.txt b/scaling/castro/wdmerger/titan_041516/three_level_omp4.txt new file mode 100644 index 0000000..cd53fb5 --- /dev/null +++ b/scaling/castro/wdmerger/titan_041516/three_level_omp4.txt @@ -0,0 +1,6 @@ + PROC AVG MIN MAX + 2048 233.2 223.1 236.5 + 4096 125.8 120.4 127.8 + 8192 84.0 82.2 85.4 + 16384 86.4 82.9 90.3 + diff --git a/scaling/castro/wdmerger/titan_041516/three_level_omp8.txt b/scaling/castro/wdmerger/titan_041516/three_level_omp8.txt new file mode 100644 index 0000000..03a40c1 --- /dev/null +++ b/scaling/castro/wdmerger/titan_041516/three_level_omp8.txt @@ -0,0 +1,5 @@ + PROC AVG MIN MAX + 2048 218.7 214.0 221.3 + 4096 141.0 133.5 144.2 + 8192 82.3 81.0 84.7 + diff --git a/scaling/castro/wdmerger/titan_041516/two_level_omp4.txt b/scaling/castro/wdmerger/titan_041516/two_level_omp4.txt new file mode 100644 index 0000000..6f0b704 --- /dev/null +++ b/scaling/castro/wdmerger/titan_041516/two_level_omp4.txt @@ -0,0 +1,6 @@ + PROC AVG MIN MAX + 256 50.5 49.3 52.0 + 512 27.4 26.8 28.2 + 1024 16.1 15.6 16.8 + 2048 11.7 11.3 12.7 + diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/256_0amr/avg_time.sh new file mode 100755 index 0000000..993afb6 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_0amr/inputs_test_wdmerger_3D new file mode 100644 index 0000000..9c437aa --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/inputs_test_wdmerger_3D @@ -0,0 +1,114 @@ + +############################## CASTRO INPUTS ############################################### + +############################################################################################ +# Problem setup +############################################################################################ + +amr.probin_file = probin_test_wdmerger_3D # Name of the probin file + +max_step = 10 # Maximum coarse timestep + +geometry.is_periodic = 0 0 0 # Non-periodic boundary conditions + +geometry.coord_sys = 0 # Cartesian coordinate system + +geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9 # Lower boundary limits in physical space +geometry.prob_hi = 5.12e9 5.12e9 5.12e9 # Upper boundary limits in physical space +castro.center = 0.0e0 0.0e0 0.0e0 # System center of mass + +castro.cfl = 0.5 # CFL number for hyperbolic system +castro.init_shrink = 0.1 # Scale back initial timestep by this factor +castro.change_max = 1.1 # Factor by which dt is allowed to change each timestep +castro.hard_cfl_limit = 0 # Whether to abort a simulation if the CFL criterion is locally violated + +############################################################################################ +# Boundary conditions +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +############################################################################################ + +castro.lo_bc = 2 2 2 # Boundary conditions on lo x, y, and z edges +castro.hi_bc = 2 2 2 # Boundary conditions on hi x, y, and z edges + +############################################################################################ +# Resolution, gridding and AMR +############################################################################################ + +amr.n_cell = 256 256 256 # Number of cells on the coarse grid + +amr.max_level = 0 # Maximum level number allowed +amr.ref_ratio = 2 + +amr.max_grid_size = 32 # Maximum grid size at each level +amr.blocking_factor = 16 # Grid sizes must be a multiple of this + +amr.grid_eff = 0.9 # What constitutes an efficient grid + +############################################################################################ +# Physics to include +############################################################################################ + +castro.do_hydro = 1 # Whether or not to do hydrodynamics +castro.do_grav = 1 # Whether or not to do gravity +castro.do_react = 0 # Whether or not to do reactions +castro.do_sponge = 1 # Whether or not to apply the sponge +castro.add_ext_src = 1 # Whether or not to apply external source terms +castro.do_rotation = 0 # Whether or not to include the rotation source term +castro.rotational_period = 100.0 # Rotational period of the rotating reference frame +castro.rotational_dPdt = -0.0 # Time rate of change of the rotational period +castro.implicit_rotation_update = 1 # Implicit rotation coupling + +############################################################################################ +# PPM options +############################################################################################ + +castro.ppm_type = 1 # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters) +castro.ppm_reference = 1 # Whether we subtract off a reference state in PPM +castro.ppm_reference_eigenvectors = 1 # Whether to evaluate eigenvectors using the reference state +castro.ppm_reference_edge_limit = 1 # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state +castro.ppm_temp_fix = 0 # Use the EOS in calculation of the edge states going into the Riemann solver +castro.grav_source_type = 4 # How to include the gravity source term in the hydro equations +castro.rot_source_type = 4 # How to include the rotation source term in the hydro equations + +############################################################################################ +# Thermodynamics +############################################################################################ + +castro.small_temp = 1.e5 # Minimum allowable temperature (K) +castro.small_dens = 1.e-5 # Minimum allowable density (g / cm**3) + +castro.allow_negative_energy = 0 # Disable the possibility of having a negative energy + +castro.dual_energy_update_E_from_e = 0 # Don't update the total energy using the internal energy +castro.dual_energy_eta1 = 1.0e-3 # Threshold for when to use the internal energy in calculating pressure +castro.dual_energy_eta2 = 1.0e-1 # Threshold for when to use (E - K) in updating internal energy + +############################################################################################ +# Gravity +############################################################################################ + +gravity.gravity_type = PoissonGrav # Full self-gravity with the Poisson equation +gravity.max_multipole_order = 6 # Multipole expansion includes terms up to r**(-max_multipole_order) +gravity.rel_tol = 1.e-10 # Relative tolerance for multigrid solver +gravity.no_sync = 1 # Turn off sync solve for gravity after refluxing + +############################################################################################ +# Diagnostics and I/O +############################################################################################ + +amr.plot_files_output = 0 # Whether or not to output plotfiles +amr.checkpoint_files_output = 0 # Whether or not to output checkpoints + +amr.check_file = chk # Root name of checkpoint file +amr.check_int = 10 # Number of timesteps between checkpoints +amr.plot_file = plt # Root name of plot file +amr.plot_int = 10 # Number of timesteps between plotfiles + +amr.v = 1 # Control verbosity in Amr.cpp +castro.v = 1 # Control verbosity in Castro.cpp +gravity.v = 1 # Control verbosity in Gravity.cpp +mg.v = 2 # Control verbosity in the multigrid solver + +amr.derive_plot_vars = NONE # Calculate all variables for plotfiles, including derived variables diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_0amr/probin_test_wdmerger_3D new file mode 100644 index 0000000..9fa6ddb --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/probin_test_wdmerger_3D @@ -0,0 +1,39 @@ +&fortin + mass_P = 0.90 + mass_S = 0.90 + + problem = 2 + + roche_radius_factor = 1.0d0 + + ambient_density = 1.0d-4 + + stellar_temp = 1.0d7 + ambient_temp = 1.0d7 + + orbital_eccentricity = 0.0d0 + orbital_angle = 0.0d0 + + max_tagging_radius = 0.75d0 + stellar_density_threshold = 1.0d0 + + smallu = 1.0d-12 + + fill_ambient_bc = F +/ + +&tagging + max_temperr_lev = 4 + temperr = 2.d8 +/ + +&sponge + sponge_lower_radius = 3.840d9 + sponge_upper_radius = 4.352d9 + sponge_timescale = 0.01d0 +/ + +&extern + use_eos_coulomb = F + eos_input_is_constant = T +/ diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP2-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP2-PE16.run new file mode 100644 index 0000000..454edcb --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=64 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP4-PE16.run new file mode 100644 index 0000000..e9094d0 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=128 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-PE16.run new file mode 100644 index 0000000..b5b66df --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=32 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI128-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI128-PE16.run new file mode 100644 index 0000000..56ac90f --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI128-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=8 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 128 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI2-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI2-PE16.run new file mode 100644 index 0000000..2a02574 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N det-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=16 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 256 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI64-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI64-PE16.run new file mode 100644 index 0000000..fd5308e --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI64-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=4 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 64 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx2-OMP4-PE16.run new file mode 100644 index 0000000..a1affe7 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx2-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:20:00,nodes=256 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 1024 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP4-PE16.run new file mode 100644 index 0000000..dad47d8 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=512 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP8-PE16.run new file mode 100644 index 0000000..199b67a --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP8-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx8-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx8-OMP8-PE16.run new file mode 100644 index 0000000..f0da840 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx8-OMP8-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:00:00,nodes=2048 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/256_1amr/avg_time.sh new file mode 100755 index 0000000..993afb6 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_1amr/inputs_test_wdmerger_3D new file mode 100644 index 0000000..4db2567 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/inputs_test_wdmerger_3D @@ -0,0 +1,114 @@ + +############################## CASTRO INPUTS ############################################### + +############################################################################################ +# Problem setup +############################################################################################ + +amr.probin_file = probin_test_wdmerger_3D # Name of the probin file + +max_step = 10 # Maximum coarse timestep + +geometry.is_periodic = 0 0 0 # Non-periodic boundary conditions + +geometry.coord_sys = 0 # Cartesian coordinate system + +geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9 # Lower boundary limits in physical space +geometry.prob_hi = 5.12e9 5.12e9 5.12e9 # Upper boundary limits in physical space +castro.center = 0.0e0 0.0e0 0.0e0 # System center of mass + +castro.cfl = 0.5 # CFL number for hyperbolic system +castro.init_shrink = 0.1 # Scale back initial timestep by this factor +castro.change_max = 1.1 # Factor by which dt is allowed to change each timestep +castro.hard_cfl_limit = 0 # Whether to abort a simulation if the CFL criterion is locally violated + +############################################################################################ +# Boundary conditions +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +############################################################################################ + +castro.lo_bc = 2 2 2 # Boundary conditions on lo x, y, and z edges +castro.hi_bc = 2 2 2 # Boundary conditions on hi x, y, and z edges + +############################################################################################ +# Resolution, gridding and AMR +############################################################################################ + +amr.n_cell = 256 256 256 # Number of cells on the coarse grid + +amr.max_level = 1 # Maximum level number allowed +amr.ref_ratio = 4 + +amr.max_grid_size = 32 # Maximum grid size at each level +amr.blocking_factor = 16 # Grid sizes must be a multiple of this + +amr.grid_eff = 0.9 # What constitutes an efficient grid + +############################################################################################ +# Physics to include +############################################################################################ + +castro.do_hydro = 1 # Whether or not to do hydrodynamics +castro.do_grav = 1 # Whether or not to do gravity +castro.do_react = 0 # Whether or not to do reactions +castro.do_sponge = 1 # Whether or not to apply the sponge +castro.add_ext_src = 1 # Whether or not to apply external source terms +castro.do_rotation = 0 # Whether or not to include the rotation source term +castro.rotational_period = 100.0 # Rotational period of the rotating reference frame +castro.rotational_dPdt = -0.0 # Time rate of change of the rotational period +castro.implicit_rotation_update = 1 # Implicit rotation coupling + +############################################################################################ +# PPM options +############################################################################################ + +castro.ppm_type = 1 # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters) +castro.ppm_reference = 1 # Whether we subtract off a reference state in PPM +castro.ppm_reference_eigenvectors = 1 # Whether to evaluate eigenvectors using the reference state +castro.ppm_reference_edge_limit = 1 # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state +castro.ppm_temp_fix = 0 # Use the EOS in calculation of the edge states going into the Riemann solver +castro.grav_source_type = 4 # How to include the gravity source term in the hydro equations +castro.rot_source_type = 4 # How to include the rotation source term in the hydro equations + +############################################################################################ +# Thermodynamics +############################################################################################ + +castro.small_temp = 1.e5 # Minimum allowable temperature (K) +castro.small_dens = 1.e-5 # Minimum allowable density (g / cm**3) + +castro.allow_negative_energy = 0 # Disable the possibility of having a negative energy + +castro.dual_energy_update_E_from_e = 0 # Don't update the total energy using the internal energy +castro.dual_energy_eta1 = 1.0e-3 # Threshold for when to use the internal energy in calculating pressure +castro.dual_energy_eta2 = 1.0e-1 # Threshold for when to use (E - K) in updating internal energy + +############################################################################################ +# Gravity +############################################################################################ + +gravity.gravity_type = PoissonGrav # Full self-gravity with the Poisson equation +gravity.max_multipole_order = 6 # Multipole expansion includes terms up to r**(-max_multipole_order) +gravity.rel_tol = 1.e-10 # Relative tolerance for multigrid solver +gravity.no_sync = 1 # Turn off sync solve for gravity after refluxing + +############################################################################################ +# Diagnostics and I/O +############################################################################################ + +amr.plot_files_output = 0 # Whether or not to output plotfiles +amr.checkpoint_files_output = 0 # Whether or not to output checkpoints + +amr.check_file = chk # Root name of checkpoint file +amr.check_int = 10 # Number of timesteps between checkpoints +amr.plot_file = plt # Root name of plot file +amr.plot_int = 10 # Number of timesteps between plotfiles + +amr.v = 1 # Control verbosity in Amr.cpp +castro.v = 1 # Control verbosity in Castro.cpp +gravity.v = 1 # Control verbosity in Gravity.cpp +mg.v = 2 # Control verbosity in the multigrid solver + +amr.derive_plot_vars = NONE # Calculate all variables for plotfiles, including derived variables diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_1amr/probin_test_wdmerger_3D new file mode 100644 index 0000000..9fa6ddb --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/probin_test_wdmerger_3D @@ -0,0 +1,39 @@ +&fortin + mass_P = 0.90 + mass_S = 0.90 + + problem = 2 + + roche_radius_factor = 1.0d0 + + ambient_density = 1.0d-4 + + stellar_temp = 1.0d7 + ambient_temp = 1.0d7 + + orbital_eccentricity = 0.0d0 + orbital_angle = 0.0d0 + + max_tagging_radius = 0.75d0 + stellar_density_threshold = 1.0d0 + + smallu = 1.0d-12 + + fill_ambient_bc = F +/ + +&tagging + max_temperr_lev = 4 + temperr = 2.d8 +/ + +&sponge + sponge_lower_radius = 3.840d9 + sponge_upper_radius = 4.352d9 + sponge_timescale = 0.01d0 +/ + +&extern + use_eos_coulomb = F + eos_input_is_constant = T +/ diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-OMP4-PE16.run new file mode 100644 index 0000000..e9094d0 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=128 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-PE16.run new file mode 100644 index 0000000..b5b66df --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=32 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI2-OMP4-PE16.run new file mode 100644 index 0000000..c92f7cc --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI2-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=64 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 256 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx2-OMP4-PE16.run new file mode 100644 index 0000000..980af95 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx2-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 1024 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx4-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx4-OMP4-PE16.run new file mode 100644 index 0000000..e97d7fa --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx4-OMP4-PE16.run @@ -0,0 +1,18 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=512 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of MPI PEs per node (2x NUMA -- up to 16)) +# -S number of MPI PEs per NUMA (up to 8 -- this is option replaces -N +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx8-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx8-OMP4-PE16.run new file mode 100644 index 0000000..ebc2172 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx8-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI128-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI128-OMP4-PE16.run new file mode 100644 index 0000000..f9c85bc --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI128-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=32 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 128 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI512-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI512-OMP8-PE16.run new file mode 100644 index 0000000..7c791dc --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI512-OMP8-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/256_2amr/avg_time.sh new file mode 100755 index 0000000..993afb6 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_2amr/inputs_test_wdmerger_3D new file mode 100644 index 0000000..2595363 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/inputs_test_wdmerger_3D @@ -0,0 +1,114 @@ + +############################## CASTRO INPUTS ############################################### + +############################################################################################ +# Problem setup +############################################################################################ + +amr.probin_file = probin_test_wdmerger_3D # Name of the probin file + +max_step = 10 # Maximum coarse timestep + +geometry.is_periodic = 0 0 0 # Non-periodic boundary conditions + +geometry.coord_sys = 0 # Cartesian coordinate system + +geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9 # Lower boundary limits in physical space +geometry.prob_hi = 5.12e9 5.12e9 5.12e9 # Upper boundary limits in physical space +castro.center = 0.0e0 0.0e0 0.0e0 # System center of mass + +castro.cfl = 0.5 # CFL number for hyperbolic system +castro.init_shrink = 0.1 # Scale back initial timestep by this factor +castro.change_max = 1.1 # Factor by which dt is allowed to change each timestep +castro.hard_cfl_limit = 0 # Whether to abort a simulation if the CFL criterion is locally violated + +############################################################################################ +# Boundary conditions +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +############################################################################################ + +castro.lo_bc = 2 2 2 # Boundary conditions on lo x, y, and z edges +castro.hi_bc = 2 2 2 # Boundary conditions on hi x, y, and z edges + +############################################################################################ +# Resolution, gridding and AMR +############################################################################################ + +amr.n_cell = 256 256 256 # Number of cells on the coarse grid + +amr.max_level = 2 # Maximum level number allowed +amr.ref_ratio = 4 2 + +amr.max_grid_size = 32 # Maximum grid size at each level +amr.blocking_factor = 16 # Grid sizes must be a multiple of this + +amr.grid_eff = 0.9 # What constitutes an efficient grid + +############################################################################################ +# Physics to include +############################################################################################ + +castro.do_hydro = 1 # Whether or not to do hydrodynamics +castro.do_grav = 1 # Whether or not to do gravity +castro.do_react = 0 # Whether or not to do reactions +castro.do_sponge = 1 # Whether or not to apply the sponge +castro.add_ext_src = 1 # Whether or not to apply external source terms +castro.do_rotation = 0 # Whether or not to include the rotation source term +castro.rotational_period = 100.0 # Rotational period of the rotating reference frame +castro.rotational_dPdt = -0.0 # Time rate of change of the rotational period +castro.implicit_rotation_update = 1 # Implicit rotation coupling + +############################################################################################ +# PPM options +############################################################################################ + +castro.ppm_type = 1 # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters) +castro.ppm_reference = 1 # Whether we subtract off a reference state in PPM +castro.ppm_reference_eigenvectors = 1 # Whether to evaluate eigenvectors using the reference state +castro.ppm_reference_edge_limit = 1 # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state +castro.ppm_temp_fix = 0 # Use the EOS in calculation of the edge states going into the Riemann solver +castro.grav_source_type = 4 # How to include the gravity source term in the hydro equations +castro.rot_source_type = 4 # How to include the rotation source term in the hydro equations + +############################################################################################ +# Thermodynamics +############################################################################################ + +castro.small_temp = 1.e5 # Minimum allowable temperature (K) +castro.small_dens = 1.e-5 # Minimum allowable density (g / cm**3) + +castro.allow_negative_energy = 0 # Disable the possibility of having a negative energy + +castro.dual_energy_update_E_from_e = 0 # Don't update the total energy using the internal energy +castro.dual_energy_eta1 = 1.0e-3 # Threshold for when to use the internal energy in calculating pressure +castro.dual_energy_eta2 = 1.0e-1 # Threshold for when to use (E - K) in updating internal energy + +############################################################################################ +# Gravity +############################################################################################ + +gravity.gravity_type = PoissonGrav # Full self-gravity with the Poisson equation +gravity.max_multipole_order = 6 # Multipole expansion includes terms up to r**(-max_multipole_order) +gravity.rel_tol = 1.e-10 # Relative tolerance for multigrid solver +gravity.no_sync = 1 # Turn off sync solve for gravity after refluxing + +############################################################################################ +# Diagnostics and I/O +############################################################################################ + +amr.plot_files_output = 0 # Whether or not to output plotfiles +amr.checkpoint_files_output = 0 # Whether or not to output checkpoints + +amr.check_file = chk # Root name of checkpoint file +amr.check_int = 10 # Number of timesteps between checkpoints +amr.plot_file = plt # Root name of plot file +amr.plot_int = 10 # Number of timesteps between plotfiles + +amr.v = 1 # Control verbosity in Amr.cpp +castro.v = 1 # Control verbosity in Castro.cpp +gravity.v = 1 # Control verbosity in Gravity.cpp +mg.v = 2 # Control verbosity in the multigrid solver + +amr.derive_plot_vars = NONE # Calculate all variables for plotfiles, including derived variables diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_2amr/probin_test_wdmerger_3D new file mode 100644 index 0000000..9fa6ddb --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/probin_test_wdmerger_3D @@ -0,0 +1,39 @@ +&fortin + mass_P = 0.90 + mass_S = 0.90 + + problem = 2 + + roche_radius_factor = 1.0d0 + + ambient_density = 1.0d-4 + + stellar_temp = 1.0d7 + ambient_temp = 1.0d7 + + orbital_eccentricity = 0.0d0 + orbital_angle = 0.0d0 + + max_tagging_radius = 0.75d0 + stellar_density_threshold = 1.0d0 + + smallu = 1.0d-12 + + fill_ambient_bc = F +/ + +&tagging + max_temperr_lev = 4 + temperr = 2.d8 +/ + +&sponge + sponge_lower_radius = 3.840d9 + sponge_upper_radius = 4.352d9 + sponge_timescale = 0.01d0 +/ + +&extern + use_eos_coulomb = F + eos_input_is_constant = T +/ diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI-OMP4-PE16.run new file mode 100644 index 0000000..ee3ac73 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=128 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI2-OMP4-PE16.run new file mode 100644 index 0000000..d48c57d --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI2-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=64 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 256 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx2-OMP4-PE16.run new file mode 100644 index 0000000..980af95 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx2-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 1024 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP4-PE16.run new file mode 100644 index 0000000..dad47d8 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=512 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP8-PE16.run new file mode 100644 index 0000000..199b67a --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP8-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-256-scale +#PBS -j oe +#PBS -l walltime=1:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/512_0amr/avg_time.sh new file mode 100755 index 0000000..993afb6 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/512_0amr/inputs_test_wdmerger_3D new file mode 100644 index 0000000..8dbc623 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/inputs_test_wdmerger_3D @@ -0,0 +1,114 @@ + +############################## CASTRO INPUTS ############################################### + +############################################################################################ +# Problem setup +############################################################################################ + +amr.probin_file = probin_test_wdmerger_3D # Name of the probin file + +max_step = 10 # Maximum coarse timestep + +geometry.is_periodic = 0 0 0 # Non-periodic boundary conditions + +geometry.coord_sys = 0 # Cartesian coordinate system + +geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9 # Lower boundary limits in physical space +geometry.prob_hi = 5.12e9 5.12e9 5.12e9 # Upper boundary limits in physical space +castro.center = 0.0e0 0.0e0 0.0e0 # System center of mass + +castro.cfl = 0.5 # CFL number for hyperbolic system +castro.init_shrink = 0.1 # Scale back initial timestep by this factor +castro.change_max = 1.1 # Factor by which dt is allowed to change each timestep +castro.hard_cfl_limit = 0 # Whether to abort a simulation if the CFL criterion is locally violated + +############################################################################################ +# Boundary conditions +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +############################################################################################ + +castro.lo_bc = 2 2 2 # Boundary conditions on lo x, y, and z edges +castro.hi_bc = 2 2 2 # Boundary conditions on hi x, y, and z edges + +############################################################################################ +# Resolution, gridding and AMR +############################################################################################ + +amr.n_cell = 512 512 512 # Number of cells on the coarse grid + +amr.max_level = 0 # Maximum level number allowed +amr.ref_ratio = 2 + +amr.max_grid_size = 32 # Maximum grid size at each level +amr.blocking_factor = 16 # Grid sizes must be a multiple of this + +amr.grid_eff = 0.9 # What constitutes an efficient grid + +############################################################################################ +# Physics to include +############################################################################################ + +castro.do_hydro = 1 # Whether or not to do hydrodynamics +castro.do_grav = 1 # Whether or not to do gravity +castro.do_react = 0 # Whether or not to do reactions +castro.do_sponge = 1 # Whether or not to apply the sponge +castro.add_ext_src = 1 # Whether or not to apply external source terms +castro.do_rotation = 0 # Whether or not to include the rotation source term +castro.rotational_period = 100.0 # Rotational period of the rotating reference frame +castro.rotational_dPdt = -0.0 # Time rate of change of the rotational period +castro.implicit_rotation_update = 1 # Implicit rotation coupling + +############################################################################################ +# PPM options +############################################################################################ + +castro.ppm_type = 1 # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters) +castro.ppm_reference = 1 # Whether we subtract off a reference state in PPM +castro.ppm_reference_eigenvectors = 1 # Whether to evaluate eigenvectors using the reference state +castro.ppm_reference_edge_limit = 1 # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state +castro.ppm_temp_fix = 0 # Use the EOS in calculation of the edge states going into the Riemann solver +castro.grav_source_type = 4 # How to include the gravity source term in the hydro equations +castro.rot_source_type = 4 # How to include the rotation source term in the hydro equations + +############################################################################################ +# Thermodynamics +############################################################################################ + +castro.small_temp = 1.e5 # Minimum allowable temperature (K) +castro.small_dens = 1.e-5 # Minimum allowable density (g / cm**3) + +castro.allow_negative_energy = 0 # Disable the possibility of having a negative energy + +castro.dual_energy_update_E_from_e = 0 # Don't update the total energy using the internal energy +castro.dual_energy_eta1 = 1.0e-3 # Threshold for when to use the internal energy in calculating pressure +castro.dual_energy_eta2 = 1.0e-1 # Threshold for when to use (E - K) in updating internal energy + +############################################################################################ +# Gravity +############################################################################################ + +gravity.gravity_type = PoissonGrav # Full self-gravity with the Poisson equation +gravity.max_multipole_order = 6 # Multipole expansion includes terms up to r**(-max_multipole_order) +gravity.rel_tol = 1.e-10 # Relative tolerance for multigrid solver +gravity.no_sync = 1 # Turn off sync solve for gravity after refluxing + +############################################################################################ +# Diagnostics and I/O +############################################################################################ + +amr.plot_files_output = 0 # Whether or not to output plotfiles +amr.checkpoint_files_output = 0 # Whether or not to output checkpoints + +amr.check_file = chk # Root name of checkpoint file +amr.check_int = 10 # Number of timesteps between checkpoints +amr.plot_file = plt # Root name of plot file +amr.plot_int = 10 # Number of timesteps between plotfiles + +amr.v = 1 # Control verbosity in Amr.cpp +castro.v = 1 # Control verbosity in Castro.cpp +gravity.v = 1 # Control verbosity in Gravity.cpp +mg.v = 2 # Control verbosity in the multigrid solver + +amr.derive_plot_vars = NONE # Calculate all variables for plotfiles, including derived variables diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/512_0amr/probin_test_wdmerger_3D new file mode 100644 index 0000000..9fa6ddb --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/probin_test_wdmerger_3D @@ -0,0 +1,39 @@ +&fortin + mass_P = 0.90 + mass_S = 0.90 + + problem = 2 + + roche_radius_factor = 1.0d0 + + ambient_density = 1.0d-4 + + stellar_temp = 1.0d7 + ambient_temp = 1.0d7 + + orbital_eccentricity = 0.0d0 + orbital_angle = 0.0d0 + + max_tagging_radius = 0.75d0 + stellar_density_threshold = 1.0d0 + + smallu = 1.0d-12 + + fill_ambient_bc = F +/ + +&tagging + max_temperr_lev = 4 + temperr = 2.d8 +/ + +&sponge + sponge_lower_radius = 3.840d9 + sponge_upper_radius = 4.352d9 + sponge_timescale = 0.01d0 +/ + +&extern + use_eos_coulomb = F + eos_input_is_constant = T +/ diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI1024-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI1024-PE16.run new file mode 100644 index 0000000..4f1d576 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI1024-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=64 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 1024 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI2048-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI2048-PE16.run new file mode 100644 index 0000000..9b3051b --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI2048-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=128 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP2-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP2-PE16.run new file mode 100644 index 0000000..8976565 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=512 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP4-PE16.run new file mode 100644 index 0000000..db26b9d --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-PE16.run new file mode 100644 index 0000000..64c1f08 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI512-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI512-PE16.run new file mode 100644 index 0000000..2d2066e --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI512-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=32 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/512_1amr/avg_time.sh new file mode 100755 index 0000000..993afb6 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/512_1amr/inputs_test_wdmerger_3D new file mode 100644 index 0000000..9914ff5 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/inputs_test_wdmerger_3D @@ -0,0 +1,114 @@ + +############################## CASTRO INPUTS ############################################### + +############################################################################################ +# Problem setup +############################################################################################ + +amr.probin_file = probin_test_wdmerger_3D # Name of the probin file + +max_step = 10 # Maximum coarse timestep + +geometry.is_periodic = 0 0 0 # Non-periodic boundary conditions + +geometry.coord_sys = 0 # Cartesian coordinate system + +geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9 # Lower boundary limits in physical space +geometry.prob_hi = 5.12e9 5.12e9 5.12e9 # Upper boundary limits in physical space +castro.center = 0.0e0 0.0e0 0.0e0 # System center of mass + +castro.cfl = 0.5 # CFL number for hyperbolic system +castro.init_shrink = 0.1 # Scale back initial timestep by this factor +castro.change_max = 1.1 # Factor by which dt is allowed to change each timestep +castro.hard_cfl_limit = 0 # Whether to abort a simulation if the CFL criterion is locally violated + +############################################################################################ +# Boundary conditions +# 0 = Interior 3 = Symmetry +# 1 = Inflow 4 = SlipWall +# 2 = Outflow 5 = NoSlipWall +############################################################################################ + +castro.lo_bc = 2 2 2 # Boundary conditions on lo x, y, and z edges +castro.hi_bc = 2 2 2 # Boundary conditions on hi x, y, and z edges + +############################################################################################ +# Resolution, gridding and AMR +############################################################################################ + +amr.n_cell = 512 512 512 # Number of cells on the coarse grid + +amr.max_level = 1 # Maximum level number allowed +amr.ref_ratio = 4 + +amr.max_grid_size = 32 # Maximum grid size at each level +amr.blocking_factor = 16 # Grid sizes must be a multiple of this + +amr.grid_eff = 0.9 # What constitutes an efficient grid + +############################################################################################ +# Physics to include +############################################################################################ + +castro.do_hydro = 1 # Whether or not to do hydrodynamics +castro.do_grav = 1 # Whether or not to do gravity +castro.do_react = 0 # Whether or not to do reactions +castro.do_sponge = 1 # Whether or not to apply the sponge +castro.add_ext_src = 1 # Whether or not to apply external source terms +castro.do_rotation = 0 # Whether or not to include the rotation source term +castro.rotational_period = 100.0 # Rotational period of the rotating reference frame +castro.rotational_dPdt = -0.0 # Time rate of change of the rotational period +castro.implicit_rotation_update = 1 # Implicit rotation coupling + +############################################################################################ +# PPM options +############################################################################################ + +castro.ppm_type = 1 # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters) +castro.ppm_reference = 1 # Whether we subtract off a reference state in PPM +castro.ppm_reference_eigenvectors = 1 # Whether to evaluate eigenvectors using the reference state +castro.ppm_reference_edge_limit = 1 # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state +castro.ppm_temp_fix = 0 # Use the EOS in calculation of the edge states going into the Riemann solver +castro.grav_source_type = 4 # How to include the gravity source term in the hydro equations +castro.rot_source_type = 4 # How to include the rotation source term in the hydro equations + +############################################################################################ +# Thermodynamics +############################################################################################ + +castro.small_temp = 1.e5 # Minimum allowable temperature (K) +castro.small_dens = 1.e-5 # Minimum allowable density (g / cm**3) + +castro.allow_negative_energy = 0 # Disable the possibility of having a negative energy + +castro.dual_energy_update_E_from_e = 0 # Don't update the total energy using the internal energy +castro.dual_energy_eta1 = 1.0e-3 # Threshold for when to use the internal energy in calculating pressure +castro.dual_energy_eta2 = 1.0e-1 # Threshold for when to use (E - K) in updating internal energy + +############################################################################################ +# Gravity +############################################################################################ + +gravity.gravity_type = PoissonGrav # Full self-gravity with the Poisson equation +gravity.max_multipole_order = 6 # Multipole expansion includes terms up to r**(-max_multipole_order) +gravity.rel_tol = 1.e-10 # Relative tolerance for multigrid solver +gravity.no_sync = 1 # Turn off sync solve for gravity after refluxing + +############################################################################################ +# Diagnostics and I/O +############################################################################################ + +amr.plot_files_output = 0 # Whether or not to output plotfiles +amr.checkpoint_files_output = 0 # Whether or not to output checkpoints + +amr.check_file = chk # Root name of checkpoint file +amr.check_int = 10 # Number of timesteps between checkpoints +amr.plot_file = plt # Root name of plot file +amr.plot_int = 10 # Number of timesteps between plotfiles + +amr.v = 1 # Control verbosity in Amr.cpp +castro.v = 1 # Control verbosity in Castro.cpp +gravity.v = 1 # Control verbosity in Gravity.cpp +mg.v = 2 # Control verbosity in the multigrid solver + +amr.derive_plot_vars = NONE # Calculate all variables for plotfiles, including derived variables diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/512_1amr/probin_test_wdmerger_3D new file mode 100644 index 0000000..9fa6ddb --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/probin_test_wdmerger_3D @@ -0,0 +1,39 @@ +&fortin + mass_P = 0.90 + mass_S = 0.90 + + problem = 2 + + roche_radius_factor = 1.0d0 + + ambient_density = 1.0d-4 + + stellar_temp = 1.0d7 + ambient_temp = 1.0d7 + + orbital_eccentricity = 0.0d0 + orbital_angle = 0.0d0 + + max_tagging_radius = 0.75d0 + stellar_density_threshold = 1.0d0 + + smallu = 1.0d-12 + + fill_ambient_bc = F +/ + +&tagging + max_temperr_lev = 4 + temperr = 2.d8 +/ + +&sponge + sponge_lower_radius = 3.840d9 + sponge_upper_radius = 4.352d9 + sponge_timescale = 0.01d0 +/ + +&extern + use_eos_coulomb = F + eos_input_is_constant = T +/ diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI1024-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI1024-PE16.run new file mode 100644 index 0000000..4f1d576 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI1024-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=64 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 1024 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP2-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP2-PE16.run new file mode 100644 index 0000000..38917c6 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=512 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP8-PE16.run new file mode 100644 index 0000000..21f7dc4 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP8-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:20:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=8 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-PE16.run new file mode 100644 index 0000000..9b3051b --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=128 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 2048 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP2-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP2-PE16.run new file mode 100644 index 0000000..8976565 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP2-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=512 +#PBS -q debug +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=2 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP4-PE16.run new file mode 100644 index 0000000..db26b9d --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP4-PE16.run @@ -0,0 +1,17 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=1024 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=4 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-PE16.run new file mode 100644 index 0000000..64c1f08 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=256 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 4096 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI512-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI512-PE16.run new file mode 100644 index 0000000..2d2066e --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI512-PE16.run @@ -0,0 +1,19 @@ +#!/bin/bash +#PBS -A ast106sbu +#PBS -N wdmerger-512-scale +#PBS -j oe +#PBS -l walltime=0:30:00,nodes=32 +#PBS -q batch +#PBS -l gres=atlas1%atlas2 + +cd $PBS_O_WORKDIR + +export OMP_NUM_THREADS=1 + +# -n number of MPI tasks +# -N number of PEs per node +# -d number of CPUs per PE -- this should be the number of threads +# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this) +aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D + + diff --git a/scaling/castro/wdmerger/titan_20171011/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/avg_time.sh new file mode 100755 index 0000000..993afb6 --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/avg_time.sh @@ -0,0 +1,3 @@ +#/bin/sh +# standard deviation is via sum of squares expression +grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}' diff --git a/scaling/castro/wdmerger/titan_20171011/castro-wdmerger-scaling.txt b/scaling/castro/wdmerger/titan_20171011/castro-wdmerger-scaling.txt new file mode 100644 index 0000000..02e089b --- /dev/null +++ b/scaling/castro/wdmerger/titan_20171011/castro-wdmerger-scaling.txt @@ -0,0 +1,62 @@ +# this was run on 2017-10-07 using the wdmerger problem in 3-d on +# titan with the PGI 17.7 compilers. + +# problem size: 256^3, max_grid_size=32 +# single level: 512 boxes at max + +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev + 64 1 64 4 32 256 0 37.0713 0.189262 + 128 1 128 8 32 256 0 18.8354 0.0999653 + 256 1 256 16 32 256 0 9.64843 0.0564946 + 512 1 512 32 32 256 0 4.96933 0.0271702 + 512 2 1024 64 32 256 0 2.69029 0.0417153 + 512 4 2048 128 32 256 0 1.59607 0.0281456 +# 1024 4 4096 256 32 256 0 1.41143 0.0101482 + + +# one AMR level (4x) + +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev + 128 4 512 32 32 256 1 53.6792 2.40441 + 256 4 1024 64 32 256 1 30.104 0.950528 + 512 4 2048 128 32 256 1 19.718 0.659894 +# 512 8 4096 256 32 256 1 15.1611 0.554613 + 1024 4 4096 256 32 256 1 14.0598 0.769543 + 2048 4 8192 512 32 256 1 13.5301 0.882619 + + +# two AMR level (4x, 2x) + +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev + 256 4 1024 64 32 256 2 104.924 2.2645 + 512 4 2048 128 32 256 2 68.8377 1.26659 + 1024 4 4096 256 32 256 2 46.0887 1.44635 + 2048 4 8192 512 32 256 2 44.116 1.78023 + 2048 8 16384 1024 32 256 2 43.7937 2.36656 + + + +# problem size: 512^3, max_grid_size = 32 +# single level: 4096 boxes at max + +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev + 512 1 512 32 32 512 0 39.0762 0.270942 + 1024 1 1024 64 32 512 0 20.6763 0.153724 + 2048 1 2048 128 32 512 0 11.307 0.0863866 + 4096 1 4096 256 32 512 0 6.66855 0.086108 + 4096 2 8192 512 32 512 0 4.24551 0.0617616 + 4096 4 16394 1024 32 512 0 3.08108 0.0691301 + + +# one AMR level (4x) + +# MPI OMP cores nodes max_grid nzones max_level avg-time-per-step stddev +# 512 1 512 32 32 512 1 OOM +# 1024 1 1024 64 32 512 1 OOM + 2048 1 2048 128 32 512 1 105.317 4.19868 + 2048 2 4096 256 32 512 1 66.8652 3.54906 + 2048 4 8192 512 32 512 1 48.5837 3.56045 + 2048 8 16384 1024 32 512 1 36.5839 2.4316 +# 4096 1 4096 256 32 512 1 71.3143 3.19988 +# 4096 2 8192 512 32 512 1 49.3618 3.17413 +# 4096 4 16394 1024 32 512 1 37.0667 2.54384