diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph b/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph
new file mode 100644
index 0000000..7ea84fd
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph
@@ -0,0 +1,57 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 0.01
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0    0    0
+geometry.coord_sys   =  0            # 0 => cart
+geometry.prob_lo     =  0    0    0
+geometry.prob_hi     =  1    1    1
+amr.n_cell           = 256  256  256
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+#amr.grid_log         = grdlog  # name of grid logging file
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 0       # maximum level number allowed
+amr.ref_ratio       = 4 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 4       # block factor in grid generation
+amr.max_grid_size   = 32
+
+
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# CHECKPOINT FILES
+#amr.check_file      = sedov_3d_chk     # root name of checkpoint file
+#amr.check_int       = 200       # number of timesteps between checkpoints
+
+# PLOTFILES
+#amr.plot_file       = sedov_3d_plt
+#amr.plot_int        = 50
+amr.derive_plot_vars=ALL
+
+# PROBIN FILENAME
+amr.probin_file = probin.3d.sph
diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph_1level b/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph_1level
new file mode 100644
index 0000000..c50947d
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/CPU_runs/inputs.3d.sph_1level
@@ -0,0 +1,57 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 0.01
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0    0    0
+geometry.coord_sys   =  0            # 0 => cart
+geometry.prob_lo     =  0    0    0
+geometry.prob_hi     =  1    1    1
+amr.n_cell           = 256  256  256
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+#amr.grid_log         = grdlog  # name of grid logging file
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 1       # maximum level number allowed
+amr.ref_ratio       = 4 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 4       # block factor in grid generation
+amr.max_grid_size   = 32
+
+
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# CHECKPOINT FILES
+#amr.check_file      = sedov_3d_chk     # root name of checkpoint file
+#amr.check_int       = 200       # number of timesteps between checkpoints
+
+# PLOTFILES
+#amr.plot_file       = sedov_3d_plt
+#amr.plot_int        = 50
+amr.derive_plot_vars=ALL
+
+# PROBIN FILENAME
+amr.probin_file = probin.3d.sph
diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/probin.3d.sph b/scaling/castro/sedov/summit_201905/CPU_runs/probin.3d.sph
new file mode 100644
index 0000000..28bf051
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/CPU_runs/probin.3d.sph
@@ -0,0 +1,29 @@
+&fortin
+
+  r_init = 0.01
+  p_ambient = 1.d-5
+  exp_energy = 1.0
+  dens_ambient = 1.0
+  nsub = 10
+
+/
+
+&tagging
+
+  denerr = 3
+  dengrad = 0.01
+  max_denerr_lev = 3
+  max_dengrad_lev = 3
+
+  presserr = 3
+  pressgrad = 0.01
+  max_presserr_lev = 3
+  max_pressgrad_lev = 3
+
+/
+
+&extern
+
+  eos_assume_neutral = T
+
+/
diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_16node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_16node_nogpu.sh
new file mode 100644
index 0000000..e2c605a
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_16node_nogpu.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 10
+#BSUB -nnodes 16
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_cpu
+#BSUB -o Sedov_cpu.%J
+#BSUB -e Sedov_cpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph  
+
+n_mpi=96 # 16 nodes * 6 mpi per node
+n_omp=7
+n_gpu=0
+n_cores=7
+n_rs_per_node=6
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.OMP.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_32node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_32node_nogpu.sh
new file mode 100644
index 0000000..64fa3c8
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_32node_nogpu.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 10
+#BSUB -nnodes 32
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_cpu
+#BSUB -o Sedov_cpu.%J
+#BSUB -e Sedov_cpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=192 # 32 nodes * 6 mpi per node
+n_omp=7
+n_gpu=0
+n_cores=7
+n_rs_per_node=6
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.OMP.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_4node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_4node_nogpu.sh
new file mode 100644
index 0000000..3e4a986
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_4node_nogpu.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 25
+#BSUB -nnodes 4
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_cpu
+#BSUB -o Sedov_cpu.%J
+#BSUB -e Sedov_cpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=24 # 4 nodes * 6 mpi per node
+n_omp=7
+n_gpu=0
+n_cores=7
+n_rs_per_node=6
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.OMP.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_64node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_64node_nogpu.sh
new file mode 100644
index 0000000..29ed037
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_64node_nogpu.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 25
+#BSUB -nnodes 64
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_cpu
+#BSUB -o Sedov_cpu.%J
+#BSUB -e Sedov_cpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=384 # 64 nodes * 6 mpi per node
+n_omp=7
+n_gpu=0
+n_cores=7
+n_rs_per_node=6
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.OMP.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/CPU_runs/summit_8node_nogpu.sh b/scaling/castro/sedov/summit_201905/CPU_runs/summit_8node_nogpu.sh
new file mode 100644
index 0000000..1c0eb15
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/CPU_runs/summit_8node_nogpu.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 25
+#BSUB -nnodes 8
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_cpu
+#BSUB -o Sedov_cpu.%J
+#BSUB -e Sedov_cpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=48 # 8 nodes * 6 mpi per node
+n_omp=7
+n_gpu=0
+n_cores=7
+n_rs_per_node=6
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.OMP.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph b/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph
new file mode 100644
index 0000000..292a19f
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph
@@ -0,0 +1,57 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 0.01
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0    0    0
+geometry.coord_sys   =  0            # 0 => cart
+geometry.prob_lo     =  0    0    0
+geometry.prob_hi     =  1    1    1
+amr.n_cell           = 256   256   256
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+#amr.grid_log         = grdlog  # name of grid logging file
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 0       # maximum level number allowed
+amr.ref_ratio       = 2 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 8       # block factor in grid generation
+amr.max_grid_size   = 64
+
+
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# CHECKPOINT FILES
+#amr.check_file      = sedov_3d_chk     # root name of checkpoint file
+#amr.check_int       = 200       # number of timesteps between checkpoints
+
+# PLOTFILES
+#amr.plot_file       = sedov_3d_plt
+#amr.plot_int        = 50
+amr.derive_plot_vars=ALL
+
+# PROBIN FILENAME
+amr.probin_file = probin.3d.sph
diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph_1level b/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph_1level
new file mode 100644
index 0000000..459efe2
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/GPU_runs/inputs.3d.sph_1level
@@ -0,0 +1,57 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 0.01
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0    0    0
+geometry.coord_sys   =  0            # 0 => cart
+geometry.prob_lo     =  0    0    0
+geometry.prob_hi     =  1    1    1
+amr.n_cell           = 256   256   256
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+#amr.grid_log         = grdlog  # name of grid logging file
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 1       # maximum level number allowed
+amr.ref_ratio       = 4 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 8       # block factor in grid generation
+amr.max_grid_size   = 64
+
+
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# CHECKPOINT FILES
+#amr.check_file      = sedov_3d_chk     # root name of checkpoint file
+#amr.check_int       = 200       # number of timesteps between checkpoints
+
+# PLOTFILES
+#amr.plot_file       = sedov_3d_plt
+#amr.plot_int        = 50
+amr.derive_plot_vars=ALL
+
+# PROBIN FILENAME
+amr.probin_file = probin.3d.sph
diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/probin.3d.sph b/scaling/castro/sedov/summit_201905/GPU_runs/probin.3d.sph
new file mode 100644
index 0000000..28bf051
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/GPU_runs/probin.3d.sph
@@ -0,0 +1,29 @@
+&fortin
+
+  r_init = 0.01
+  p_ambient = 1.d-5
+  exp_energy = 1.0
+  dens_ambient = 1.0
+  nsub = 10
+
+/
+
+&tagging
+
+  denerr = 3
+  dengrad = 0.01
+  max_denerr_lev = 3
+  max_dengrad_lev = 3
+
+  presserr = 3
+  pressgrad = 0.01
+  max_presserr_lev = 3
+  max_pressgrad_lev = 3
+
+/
+
+&extern
+
+  eos_assume_neutral = T
+
+/
diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_16nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_16nodes.sh
new file mode 100644
index 0000000..70148b8
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_16nodes.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 30
+#BSUB -nnodes 16
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_gpu
+#BSUB -o Sedov_gpu.%J
+#BSUB -e Sedov_gpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=96 # 16 nodes * 6 gpu per node
+n_omp=1 
+n_gpu=1
+n_cores=1
+n_rs_per_node=6 
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.CUDA.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_32nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_32nodes.sh
new file mode 100644
index 0000000..1f3b7c8
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_32nodes.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 15
+#BSUB -nnodes 32
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_gpu
+#BSUB -o Sedov_gpu.%J
+#BSUB -e Sedov_gpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=192 # 16 nodes * 6 gpu per node
+n_omp=1 
+n_gpu=1
+n_cores=1
+n_rs_per_node=6 
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.CUDA.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_4nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_4nodes.sh
new file mode 100644
index 0000000..bd1200d
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_4nodes.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 15
+#BSUB -nnodes 4
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_gpu
+#BSUB -o Sedov_gpu.%J
+#BSUB -e Sedov_gpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=24 # 4 nodes * 6 gpu per node
+n_omp=1 
+n_gpu=1
+n_cores=1
+n_rs_per_node=6 
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.CUDA.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_64nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_64nodes.sh
new file mode 100644
index 0000000..e3e4e74
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_64nodes.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 15
+#BSUB -nnodes 64
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_gpu
+#BSUB -o Sedov_gpu.%J
+#BSUB -e Sedov_gpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=384 # 64 nodes * 6 gpu per node
+n_omp=1 
+n_gpu=1
+n_cores=1
+n_rs_per_node=6 
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.CUDA.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/GPU_runs/summit_8nodes.sh b/scaling/castro/sedov/summit_201905/GPU_runs/summit_8nodes.sh
new file mode 100644
index 0000000..76f9be3
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/GPU_runs/summit_8nodes.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#BSUB -P ast106
+#BSUB -W 30
+#BSUB -nnodes 8
+#BSUB -alloc_flags smt1
+#BSUB -J Sedov_gpu
+#BSUB -o Sedov_gpu.%J
+#BSUB -e Sedov_gpu.%J
+
+cd $LS_SUBCWD
+
+inputs_file=inputs.3d.sph_1level  
+
+n_mpi=48 # 8 nodes * 6 gpu per node
+n_omp=1 
+n_gpu=1
+n_cores=1
+n_rs_per_node=6 
+
+export OMP_NUM_THREADS=$n_omp
+
+Castro_ex=./Castro3d.pgi.MPI.CUDA.ex
+
+jsrun -n $n_mpi -r $n_rs_per_node -c $n_cores -a 1 -g $n_gpu $Castro_ex $inputs_file
diff --git a/scaling/castro/sedov/summit_201905/plot_scaling.py b/scaling/castro/sedov/summit_201905/plot_scaling.py
new file mode 100644
index 0000000..3e3c355
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/plot_scaling.py
@@ -0,0 +1,55 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+data = np.loadtxt("summit_scaling_may19.txt")
+
+for i in range (0,40,5):
+    #print(i)
+    nodes = data[i:i+5,0]
+    avg_zone = data[i:i+5,2]
+
+    if i in [0,5,20,25]:
+        if data[i,1]==256 : #blue for 256^3 and orange for 512^3 when using gpu
+            color = "C0"
+        else:
+            color = "C1"
+    else:
+        if data[i,1]==256 : #skyblue for 256^3 and red for 512^3 when using MPI+OpenMP
+            color = "C9"
+        else:
+            color = "C3"
+
+
+
+    if i in [5,15,25,35]: #amr triangle marker, no amr circle
+        marker ="^"
+    else:
+        marker ="o"
+
+    #plt.scatter(nodes, avg_zone)
+    plt.plot(nodes,avg_zone, marker+color, ls=":")
+
+plt.xlabel("number of nodes")
+plt.ylabel(r"Avg # of zones advanced/ $\mu$s")
+
+#legends
+legs = []
+legnames = []
+legs.append(plt.Line2D((0,1),(0,0), color = "C0"))
+legnames.append(r"$256^3$ gpu")
+legs.append(plt.Line2D((0,1),(0,0), color = "C1"))
+legnames.append(r"$512^3$ gpu")
+legs.append(plt.Line2D((0,1),(0,0), color = "C9"))
+legnames.append(r"$256^3$ MPI+OMP")
+legs.append(plt.Line2D((0,1),(0,0), color = "C3"))
+legnames.append(r"$512^3$ MPI+OMP")
+legs.append(plt.Line2D((0,1),(0,0), color="k",
+                       marker="o", markeredgecolor="k", markerfacecolor="k", linestyle="none"))
+legnames.append("no AMR")
+legs.append(plt.Line2D((0,1),(0,0), color="k",
+                       marker="^", markeredgecolor="k", markerfacecolor="k",  linestyle="none"))
+legnames.append("base + one 4x level")
+
+plt.legend(legs, legnames, frameon=False, fontsize="8", numpoints=1, loc=0, ncol=3)
+
+plt.savefig("summit_sedov.png", dpi=150)
diff --git a/scaling/castro/sedov/summit_201905/summit_scaling_may19.txt b/scaling/castro/sedov/summit_201905/summit_scaling_may19.txt
new file mode 100644
index 0000000..3f00675
--- /dev/null
+++ b/scaling/castro/sedov/summit_201905/summit_scaling_may19.txt
@@ -0,0 +1,70 @@
+
+
+#no AMR   ---- gpu : 6 per node, amr.max_grid_size=64, blocking_factor=8
+#nodes    grid size   average # zones perusec
+  4         256            61.312
+  8         256            89.165
+ 16         256            154.102
+ 32         256            230.673
+ 64         256            313.496
+
+
+#1 x4 level ----- gpu
+#nodes    grid_size   average # zones per usec
+  4         256            42.297
+  8         256            54.026
+ 16         256            70.412
+ 32         256            86.652
+ 64         256            98.515
+
+
+#no AMR   ---- MPI+OpenMP : 6 MPI + 7 OpenMP per node
+#nodes      grid_size  average # zones per usec 
+  4          256            3.790 
+  8          256            6.826
+ 16          256           11.335
+ 32          256           21.504 
+ 64          256           22.354
+
+#1 x4 level ----- MPI+OpenMP
+#nodes    grid_size   average # zones per usec
+  4         256             2.527 
+  8         256             5.027
+ 16         256             7.916
+ 32         256            15.190
+ 64         256            14.253 
+
+#no AMR   ---- gpu : 6 per node, amr.max_grid_size=64, blocking_factor=8
+#nodes    grid size   average # zones perusec
+  4         512            73.328
+  8         512           143.232
+ 16         512           251.890 
+ 32         512           468.750 
+ 64         512           695.104 
+
+
+#1 x4 level ----- gpu
+#nodes    grid_size   average # zones per usec
+  4         512            62.00 
+  8         512           117.079 
+ 16         512           188.958 
+ 32         512           292.351 
+ 64         512           387.022
+
+
+#no AMR   ---- MPI+OpenMP : 6 MPI + 7 OpenMP per node
+#nodes      grid_size  average # zones per usec 
+  4         512             4.077 
+  8         512             8.006
+ 16         512            14.406 
+ 32         512            30.299
+ 64         512            49.173
+
+#1 x4 level ----- MPI+OpenMP
+#nodes    grid_size   average # zones per usec
+  4         512             3.307  
+  8         512             6.203
+ 16         512            12.366 
+ 32         512            22.265
+ 64         512            41.535
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/avg_time.sh b/scaling/castro/sedov/titan_20171011/1level/avg_time.sh
new file mode 100755
index 0000000..d5486dc
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -5 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/sedov/titan_20171011/1level/inputs.starlord b/scaling/castro/sedov/titan_20171011/1level/inputs.starlord
new file mode 100644
index 0000000..49f2021
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/inputs.starlord
@@ -0,0 +1,49 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 1.0e-2
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0      0      0
+geometry.coord_sys   =  0
+geometry.prob_lo     =  0      0      0
+geometry.prob_hi     =  1.0e9  1.0e9  1.0e9
+amr.n_cell           =  1024   1024   1024
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+castro.do_ctu = 0
+castro.allow_negative_energy = 0
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 1       # maximum level number allowed
+amr.ref_ratio       = 2 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 16      # block factor in grid generation
+amr.max_grid_size   = 64
+
+# CHECKPOINT FILES
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# PROBIN FILENAME
+amr.probin_file = probin.starlord
diff --git a/scaling/castro/sedov/titan_20171011/1level/probin.starlord b/scaling/castro/sedov/titan_20171011/1level/probin.starlord
new file mode 100644
index 0000000..1dfabc0
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/probin.starlord
@@ -0,0 +1,27 @@
+&fortin
+
+  r_init = 3.0d8
+  p_ambient = 1.d21
+  exp_energy = 1.d52
+  dens_ambient = 1.d4
+  nsub = 1
+
+/
+
+&tagging
+
+  denerr = 3
+  dengrad = 1.e4
+  max_denerr_lev = 0
+  max_dengrad_lev = 3
+
+  presserr = 3
+  pressgrad = 1.e4
+  max_presserr_lev = 0
+  max_pressgrad_lev = 3
+
+/
+
+&extern
+
+/
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI1024-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI1024-PE16.run
new file mode 100644
index 0000000..b25d5d4
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI1024-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:45:00,nodes=64
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 1024 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP16-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP16-PE16.run
new file mode 100644
index 0000000..962fe25
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP16-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=2048
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=16
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 1 -d 16 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP2-PE16.run
new file mode 100644
index 0000000..d3b396e
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 4 -d 2 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP4-PE16.run
new file mode 100644
index 0000000..32ac1d1
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP4-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=512
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP8-PE16.run
new file mode 100644
index 0000000..44c8361
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-OMP8-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 1 -d 8 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-PE16.run
new file mode 100644
index 0000000..27284b2
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI2048-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=128
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP2-PE16.run
new file mode 100644
index 0000000..4a1a33b
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP2-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=512
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -S 4 -d 2 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP4-PE16.run
new file mode 100644
index 0000000..167002d
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP8-PE16.run
new file mode 100644
index 0000000..30e628a
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-OMP8-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=2048
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -S 1 -d 8 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-PE16.run b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-PE16.run
new file mode 100644
index 0000000..edca485
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level/titan-MPI4096-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/avg_time.sh b/scaling/castro/sedov/titan_20171011/1level_4x/avg_time.sh
new file mode 100755
index 0000000..d5486dc
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -5 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/inputs.starlord b/scaling/castro/sedov/titan_20171011/1level_4x/inputs.starlord
new file mode 100644
index 0000000..3e26dba
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/inputs.starlord
@@ -0,0 +1,49 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 1.0e-2
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0      0      0
+geometry.coord_sys   =  0
+geometry.prob_lo     =  0      0      0
+geometry.prob_hi     =  1.0e9  1.0e9  1.0e9
+amr.n_cell           =  1024   1024   1024
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+castro.do_ctu = 0
+castro.allow_negative_energy = 0
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 1       # maximum level number allowed
+amr.ref_ratio       = 4 2 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 16      # block factor in grid generation
+amr.max_grid_size   = 64
+
+# CHECKPOINT FILES
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# PROBIN FILENAME
+amr.probin_file = probin.starlord
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/probin.starlord b/scaling/castro/sedov/titan_20171011/1level_4x/probin.starlord
new file mode 100644
index 0000000..1dfabc0
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/probin.starlord
@@ -0,0 +1,27 @@
+&fortin
+
+  r_init = 3.0d8
+  p_ambient = 1.d21
+  exp_energy = 1.d52
+  dens_ambient = 1.d4
+  nsub = 1
+
+/
+
+&tagging
+
+  denerr = 3
+  dengrad = 1.e4
+  max_denerr_lev = 0
+  max_dengrad_lev = 3
+
+  presserr = 3
+  pressgrad = 1.e4
+  max_presserr_lev = 0
+  max_pressgrad_lev = 3
+
+/
+
+&extern
+
+/
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP2-PE16.run
new file mode 100644
index 0000000..d3b396e
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 4 -d 2 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP4-PE16.run
new file mode 100644
index 0000000..32ac1d1
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP4-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=512
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP8-PE16.run
new file mode 100644
index 0000000..44c8361
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-OMP8-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 1 -d 8 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-PE16.run
new file mode 100644
index 0000000..27284b2
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI2048-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=128
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP2-PE16.run
new file mode 100644
index 0000000..4a1a33b
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP2-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=512
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -S 4 -d 2 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP4-PE16.run
new file mode 100644
index 0000000..167002d
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP8-PE16.run
new file mode 100644
index 0000000..30e628a
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-OMP8-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=2048
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -S 1 -d 8 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-PE16.run b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-PE16.run
new file mode 100644
index 0000000..6ee9a43
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/1level_4x/titan-MPI4096-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-2lev-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -S 8 -d 1 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/64_1024/inputs.starlord b/scaling/castro/sedov/titan_20171011/64_1024/inputs.starlord
new file mode 100644
index 0000000..c7d7f30
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1024/inputs.starlord
@@ -0,0 +1,49 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 1.0e-2
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0      0      0
+geometry.coord_sys   =  0
+geometry.prob_lo     =  0      0      0
+geometry.prob_hi     =  1.0e9  1.0e9  1.0e9
+amr.n_cell           =  1024   1024   1024
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+castro.do_ctu = 0
+castro.allow_negative_energy = 0
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 0       # maximum level number allowed
+amr.ref_ratio       = 2 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 4       # block factor in grid generation
+amr.max_grid_size   = 64
+
+# CHECKPOINT FILES
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# PROBIN FILENAME
+amr.probin_file = probin.starlord
diff --git a/scaling/castro/sedov/titan_20171011/64_1024/probin.starlord b/scaling/castro/sedov/titan_20171011/64_1024/probin.starlord
new file mode 100644
index 0000000..bfaca7e
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1024/probin.starlord
@@ -0,0 +1,27 @@
+&fortin
+
+  r_init = 1.25d8
+  p_ambient = 1.d21
+  exp_energy = 1.d52
+  dens_ambient = 1.d4
+  nsub = 10
+
+/
+
+&tagging
+
+  denerr = 3
+  dengrad = 0.01
+  max_denerr_lev = 3
+  max_dengrad_lev = 3
+
+  presserr = 3
+  pressgrad = 0.01
+  max_presserr_lev = 3
+  max_pressgrad_lev = 3
+
+/
+
+&extern
+
+/
diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI-PE16.run
new file mode 100644
index 0000000..5038a7c
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1024-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI2-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI2-PE16.run
new file mode 100644
index 0000000..5139169
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-MPI2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1024-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=128
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP2-PE16.run
new file mode 100644
index 0000000..42881ec
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP2-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1024-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=512
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP4-PE16.run
new file mode 100644
index 0000000..1318217
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1024-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
diff --git a/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP8-PE16.run
new file mode 100644
index 0000000..2acaaeb
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1024/titan-1024-OMP8-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-4096-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=2048
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/64_1536/inputs.starlord b/scaling/castro/sedov/titan_20171011/64_1536/inputs.starlord
new file mode 100644
index 0000000..29cf89a
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1536/inputs.starlord
@@ -0,0 +1,49 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 1.0e-2
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0      0      0
+geometry.coord_sys   =  0
+geometry.prob_lo     =  0      0      0
+geometry.prob_hi     =  1.0e9  1.0e9  1.0e9
+amr.n_cell           =  1536   1536   1536
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+castro.do_ctu = 0
+castro.allow_negative_energy = 0
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 0       # maximum level number allowed
+amr.ref_ratio       = 2 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 4       # block factor in grid generation
+amr.max_grid_size   = 64
+
+# CHECKPOINT FILES
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# PROBIN FILENAME
+amr.probin_file = probin.starlord
diff --git a/scaling/castro/sedov/titan_20171011/64_1536/probin.starlord b/scaling/castro/sedov/titan_20171011/64_1536/probin.starlord
new file mode 100644
index 0000000..bfaca7e
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1536/probin.starlord
@@ -0,0 +1,27 @@
+&fortin
+
+  r_init = 1.25d8
+  p_ambient = 1.d21
+  exp_energy = 1.d52
+  dens_ambient = 1.d4
+  nsub = 10
+
+/
+
+&tagging
+
+  denerr = 3
+  dengrad = 0.01
+  max_denerr_lev = 3
+  max_dengrad_lev = 3
+
+  presserr = 3
+  pressgrad = 0.01
+  max_presserr_lev = 3
+  max_pressgrad_lev = 3
+
+/
+
+&extern
+
+/
diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI-PE16.run
new file mode 100644
index 0000000..4175bea
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1024-scale
+#PBS -j oe
+#PBS -l walltime=0:20:00,nodes=864
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 13824 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI2-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI2-PE16.run
new file mode 100644
index 0000000..019a694
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-MPI2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1536-scale
+#PBS -j oe
+#PBS -l walltime=0:20:00,nodes=432
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 6912 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP2-PE16.run
new file mode 100644
index 0000000..16e4ed7
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP2-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1536-scale
+#PBS -j oe
+#PBS -l walltime=0:20:00,nodes=1728
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 13824 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP4-PE16.run
new file mode 100644
index 0000000..de39ceb
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1536-scale
+#PBS -j oe
+#PBS -l walltime=0:20:00,nodes=3456
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 13824 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
diff --git a/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP8-PE16.run
new file mode 100644
index 0000000..3531d2d
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_1536/titan-1536-OMP8-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-1536-scale
+#PBS -j oe
+#PBS -l walltime=0:20:00,nodes=6912
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 13824 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/64_512/avg_time.sh b/scaling/castro/sedov/titan_20171011/64_512/avg_time.sh
new file mode 100755
index 0000000..d5486dc
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_512/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -5 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/sedov/titan_20171011/64_512/inputs.starlord b/scaling/castro/sedov/titan_20171011/64_512/inputs.starlord
new file mode 100644
index 0000000..c3beffc
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_512/inputs.starlord
@@ -0,0 +1,49 @@
+# ------------------  INPUTS TO MAIN PROGRAM  -------------------
+max_step = 10
+stop_time = 1.0e-2
+
+# PROBLEM SIZE & GEOMETRY
+geometry.is_periodic =  0      0      0
+geometry.coord_sys   =  0
+geometry.prob_lo     =  0      0      0
+geometry.prob_hi     =  1.0e9  1.0e9  1.0e9
+amr.n_cell           =  512    512    512
+
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+# >>>>>>>>>>>>>  BC FLAGS <<<<<<<<<<<<<<<<
+castro.lo_bc       =  2   2   2
+castro.hi_bc       =  2   2   2
+
+# WHICH PHYSICS
+castro.do_hydro = 1
+castro.do_react = 0
+castro.ppm_type = 1
+castro.do_ctu = 0
+castro.allow_negative_energy = 0
+
+# TIME STEP CONTROL
+castro.cfl            = 0.5     # cfl number for hyperbolic system
+castro.init_shrink    = 0.01    # scale back initial timestep
+castro.change_max     = 1.1     # maximum increase in dt over successive steps
+
+# DIAGNOSTICS & VERBOSITY
+castro.sum_interval   = 1       # timesteps between computing mass
+castro.v              = 1       # verbosity in Castro.cpp
+amr.v                 = 1       # verbosity in Amr.cpp
+
+# REFINEMENT / REGRIDDING
+amr.max_level       = 0       # maximum level number allowed
+amr.ref_ratio       = 2 2 2 2 # refinement ratio
+amr.regrid_int      = 2       # how often to regrid
+amr.blocking_factor = 4       # block factor in grid generation
+amr.max_grid_size   = 64
+
+# CHECKPOINT FILES
+amr.checkpoint_files_output = 0
+amr.plot_files_output = 0
+
+# PROBIN FILENAME
+amr.probin_file = probin.starlord
diff --git a/scaling/castro/sedov/titan_20171011/64_512/probin.starlord b/scaling/castro/sedov/titan_20171011/64_512/probin.starlord
new file mode 100644
index 0000000..bfaca7e
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_512/probin.starlord
@@ -0,0 +1,27 @@
+&fortin
+
+  r_init = 1.25d8
+  p_ambient = 1.d21
+  exp_energy = 1.d52
+  dens_ambient = 1.d4
+  nsub = 10
+
+/
+
+&tagging
+
+  denerr = 3
+  dengrad = 0.01
+  max_denerr_lev = 3
+  max_dengrad_lev = 3
+
+  presserr = 3
+  pressgrad = 0.01
+  max_presserr_lev = 3
+  max_pressgrad_lev = 3
+
+/
+
+&extern
+
+/
diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI-PE16.run
new file mode 100644
index 0000000..fda4d92
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=32
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI2-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI2-PE16.run
new file mode 100644
index 0000000..b23b3a7
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-MPI2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=16
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 256  -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
+
diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP2-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP2-PE16.run
new file mode 100644
index 0000000..07dadbf
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP2-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=64
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP4-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP4-PE16.run
new file mode 100644
index 0000000..2e78e9b
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=128
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
diff --git a/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP8-PE16.run b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP8-PE16.run
new file mode 100644
index 0000000..d1b2aee
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/64_512/titan-512-OMP8-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N sedov-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs.starlord
+
diff --git a/scaling/castro/sedov/titan_20171011/castro-sedov-scaling.txt b/scaling/castro/sedov/titan_20171011/castro-sedov-scaling.txt
new file mode 100644
index 0000000..2546b0c
--- /dev/null
+++ b/scaling/castro/sedov/titan_20171011/castro-sedov-scaling.txt
@@ -0,0 +1,56 @@
+# this was run on 2017-10-07 using the Sedov in 3-d on
+# titan with helmeos and inputs.starlord with the PGI 17.7 compilers.
+#
+# run for 10 steps and then average time for last 5
+
+# MPI    OMP    cores     nodes     max_grid    nzones   max_level    avg-time-per-step      stddev
+  256     1      256        16         64          512      0                27.3737        0.037914
+  512     1      512        32         64          512      0                13.797         0.0480446
+  512     2     1024        64         64          512      0                 7.02318       0.0126529
+  512     4     2048       128         64          512      0                 3.61333       0.00935958
+  512     8     4096       256         64          512      0                 2.01249       0.0299526
+
+ 2048     1     2048       128         64         1024      0                27.4302        0.039496
+ 4096     1     4096       256         64         1024      0                14.0757        0.276087
+ 4096     2     8192       512         64         1024      0                 7.11302       0.00464732
+ 4096     4    16384      1024         64         1024      0                 3.67513       0.00847975
+ 4096     8    32768      2048         64         1024      0                 2.05405       0.0142505
+
+ 6912     1     6912       432         64         1536      0                27.5459        0.100138
+13824     1    13824       864         64         1536      0                14.041         0.0896588
+13824     2    27648      1728         64         1536      0                 7.15245       0.0197148
+13824     4    55296      3456         64         1536      0                 3.7265        0.00461254
+13824     8   110592      6912         64         1536      0                 2.1079        0.0216659
+
+
+
+# 1 level, blocking_factor = 8
+# MPI    OMP    cores     nodes     max_grid    nzones   max_level    avg-time-per-step      stddev
+# 2048     1     2048       128         64         1024      1                62.9596        7.87975
+# 4096     1     4096       256         64         1024      1                44.4701       10.4288
+# 4096     2     8192       512         64         1024      1                23.5499        5.92599
+# 4096     4    16384      1024         64         1024      1                13.4561        3.67212
+# 4096     8    32768      2048         64         1024      1                 8.52401       2.89531
+
+
+# 1 level, blocking_factor = 16 (can't compare these #s to above, since more zones with this blocking factor)
+# MPI    OMP    cores     nodes     max_grid    nzones   max_level    avg-time-per-step      stddev
+ 2048     1     2048       128         64         1024      1                78.0444        7.11191
+ 2048     2     4096       256         64         1024      1                41.9756        3.8821
+ 2048     4     8192       512         64         1024      1                24.5403        2.40322
+ 2048     8    16384      1024         64         1024      1                14.2543        1.57262
+ 2048    16    32768      2048         64         1024      1                8.70782       1.3139
+# 4096     1     4096       256         64         1024      1                51.487        13.5009
+# 4096     2     8192       512         64         1024      1                26.6659        7.14374
+# 4096     4    16384      1024         64         1024      1                14.9838        4.11424
+# 4096     8    32768      2048         64         1024      1                 8.88012       2.59245
+
+
+# 1 4x level, blocking factor = 16
+# MPI    OMP    cores     nodes     max_grid    nzones   max_level    avg-time-per-step      stddev
+# 2048     1     2048       128         64         1024      1   OOM
+# 4096     1     4096       256         64         1024      1    OOM
+ 4096     2     8192       512         64         1024      14                135.137         6.23752
+ 4096     4    16384      1024         64         1024      14                 79.4846        4.28278
+ 4096     8    32768      2048         64         1024      14                 47.4894        3.41423
+
diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_128.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_128.knl.MPI.OMP.slurm
new file mode 100644
index 0000000..68cca56
--- /dev/null
+++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_128.knl.MPI.OMP.slurm
@@ -0,0 +1,22 @@
+#!/bin/bash
+#SBATCH -N 128
+#SBATCH -C knl,quad,cache
+#SBATCH -p debug
+#SBATCH -t 00:20:00
+#SBATCH -J test3
+#SBATCH -A m1938
+
+
+export OMP_PROC_BIND=close
+export OMP_PLACES=threads
+
+export OMP_NUM_THREADS=16 
+
+cd $SLURM_SUBMIT_DIR
+
+
+
+srun -n 2048 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
+
diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_16.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_16.knl.MPI.OMP.slurm
new file mode 100644
index 0000000..855e9fc
--- /dev/null
+++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_16.knl.MPI.OMP.slurm
@@ -0,0 +1,22 @@
+#!/bin/bash
+#SBATCH -N 16
+#SBATCH -C knl,quad,cache
+#SBATCH -p debug
+#SBATCH -t 00:30:00
+#SBATCH -J test3
+#SBATCH -A m1938
+
+
+export OMP_PROC_BIND=close
+export OMP_PLACES=threads
+
+export OMP_NUM_THREADS=16 
+
+cd $SLURM_SUBMIT_DIR
+
+
+
+srun -n 256 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
+
diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_32.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_32.knl.MPI.OMP.slurm
new file mode 100644
index 0000000..b268f78
--- /dev/null
+++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_32.knl.MPI.OMP.slurm
@@ -0,0 +1,22 @@
+#!/bin/bash
+#SBATCH -N 32
+#SBATCH -C knl,quad,cache
+#SBATCH -p regular
+#SBATCH -t 00:45:00
+#SBATCH -J test3
+#SBATCH -A m1938
+
+
+export OMP_PROC_BIND=close 
+export OMP_PLACES=threads
+
+export OMP_NUM_THREADS=16 
+
+cd $SLURM_SUBMIT_DIR
+
+
+
+srun -n 512 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
+
diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_64.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_64.knl.MPI.OMP.slurm
new file mode 100644
index 0000000..40133c7
--- /dev/null
+++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_64.knl.MPI.OMP.slurm
@@ -0,0 +1,22 @@
+#!/bin/bash
+#SBATCH -N 64
+#SBATCH -C knl,quad,cache
+#SBATCH -p debug
+#SBATCH -t 00:20:00
+#SBATCH -J test3
+#SBATCH -A m1938
+
+
+export OMP_PROC_BIND=close  
+export OMP_PLACES=threads
+
+export OMP_NUM_THREADS=16 
+
+cd $SLURM_SUBMIT_DIR
+
+
+
+srun -n 1024 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
+
diff --git a/scaling/castro/wdmerger/cori_201710/cori-nodes_96.knl.MPI.OMP.slurm b/scaling/castro/wdmerger/cori_201710/cori-nodes_96.knl.MPI.OMP.slurm
new file mode 100644
index 0000000..b87990d
--- /dev/null
+++ b/scaling/castro/wdmerger/cori_201710/cori-nodes_96.knl.MPI.OMP.slurm
@@ -0,0 +1,22 @@
+#!/bin/bash
+#SBATCH -N 96
+#SBATCH -C knl,quad,cache
+#SBATCH -p regular
+#SBATCH -t 01:30:00
+#SBATCH -J test3
+#SBATCH -A m1938
+
+
+export OMP_PROC_BIND=close
+export OMP_PLACES=threads
+
+export OMP_NUM_THREADS=16 
+
+cd $SLURM_SUBMIT_DIR
+
+
+
+srun -n 1536 -c 16 --cpu_bind=cores ./Castro3d.gnu.mic-knl.DEBUG.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
+
diff --git a/scaling/castro/wdmerger/cori_201710/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/cori_201710/inputs_test_wdmerger_3D
new file mode 100644
index 0000000..5355835
--- /dev/null
+++ b/scaling/castro/wdmerger/cori_201710/inputs_test_wdmerger_3D
@@ -0,0 +1,119 @@
+
+############################## CASTRO INPUTS ###############################################
+
+############################################################################################
+# Problem setup
+############################################################################################
+
+amr.probin_file = probin_test_wdmerger_3D          # Name of the probin file
+
+max_step = 10                                      # Maximum coarse timestep
+
+geometry.is_periodic = 0 0 0                       # Non-periodic boundary conditions
+
+geometry.coord_sys = 0                             # Cartesian coordinate system
+
+geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9         # Lower boundary limits in physical space
+geometry.prob_hi =  5.12e9  5.12e9  5.12e9         # Upper boundary limits in physical space
+castro.center =      0.0e0   0.0e0   0.0e0         # System center of mass
+
+castro.cfl = 0.5                                   # CFL number for hyperbolic system
+castro.init_shrink = 0.1                           # Scale back initial timestep by this factor
+castro.change_max = 1.1                            # Factor by which dt is allowed to change each timestep
+castro.hard_cfl_limit = 0			   # Whether to abort a simulation if the CFL criterion is locally violated
+
+############################################################################################
+# Boundary conditions
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+############################################################################################
+
+castro.lo_bc = 2 2 2                               # Boundary conditions on lo x, y, and z edges
+castro.hi_bc = 2 2 2                               # Boundary conditions on hi x, y, and z edges
+
+############################################################################################ 
+# Resolution, gridding and AMR
+############################################################################################
+
+amr.n_cell = 512 512 512                             # Number of cells on the coarse grid
+
+amr.max_level = 0                                    # Maximum level number allowed
+amr.ref_ratio = 4 2
+
+amr.max_grid_size = 32 48                          # Maximum grid size at each level
+amr.blocking_factor = 16                           # Grid sizes must be a multiple of this
+
+amr.grid_eff = 0.9                                 # What constitutes an efficient grid
+
+############################################################################################
+# Physics to include
+############################################################################################
+
+castro.do_hydro = 1                                # Whether or not to do hydrodynamics
+castro.do_grav = 1                                 # Whether or not to do gravity
+castro.do_react = 0                                # Whether or not to do reactions
+castro.do_sponge = 1                               # Whether or not to apply the sponge
+castro.add_ext_src = 1                             # Whether or not to apply external source terms
+castro.do_rotation = 0                             # Whether or not to include the rotation source term
+castro.rotational_period = 100.0                   # Rotational period of the rotating reference frame
+castro.rotational_dPdt = -0.0                      # Time rate of change of the rotational period
+castro.implicit_rotation_update = 1                # Implicit rotation coupling
+
+############################################################################################
+# PPM options
+############################################################################################
+
+castro.ppm_type = 1                                # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters)
+castro.ppm_reference = 1                           # Whether we subtract off a reference state in PPM
+castro.ppm_reference_eigenvectors = 1              # Whether to evaluate eigenvectors using the reference state
+castro.ppm_reference_edge_limit = 1                # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state
+castro.ppm_temp_fix = 0                            # Use the EOS in calculation of the edge states going into the Riemann solver
+castro.grav_source_type = 4                        # How to include the gravity source term in the hydro equations
+castro.rot_source_type = 4                         # How to include the rotation source term in the hydro equations
+
+############################################################################################
+# Thermodynamics
+############################################################################################
+
+castro.small_temp = 1.e5                           # Minimum allowable temperature (K)
+castro.small_dens = 1.e-5                          # Minimum allowable density (g / cm**3)
+
+castro.allow_negative_energy = 0                   # Disable the possibility of having a negative energy
+
+castro.dual_energy_update_E_from_e = 0             # Don't update the total energy using the internal energy
+castro.dual_energy_eta1 = 1.0e-3                   # Threshold for when to use the internal energy in calculating pressure
+castro.dual_energy_eta2 = 1.0e-1                   # Threshold for when to use (E - K) in updating internal energy
+
+############################################################################################
+# Gravity
+############################################################################################
+
+gravity.gravity_type = PoissonGrav                 # Full self-gravity with the Poisson equation
+gravity.max_multipole_order = 6                    # Multipole expansion includes terms up to r**(-max_multipole_order)
+gravity.rel_tol = 1.e-10                           # Relative tolerance for multigrid solver
+gravity.no_sync = 1                                # Turn off sync solve for gravity after refluxing
+
+############################################################################################
+# Diagnostics and I/O
+############################################################################################
+
+amr.plot_files_output = 0                          # Whether or not to output plotfiles
+amr.checkpoint_files_output = 0                    # Whether or not to output checkpoints
+
+amr.check_file = chk                               # Root name of checkpoint file
+amr.check_int = 10                                 # Number of timesteps between checkpoints
+amr.plot_file = plt                                # Root name of plot file
+amr.plot_int = 10                                  # Number of timesteps between plotfiles
+
+amr.v = 1                                          # Control verbosity in Amr.cpp
+castro.v = 0                                       # Control verbosity in Castro.cpp
+
+castro.print_fortran_warnings = 0
+
+gravity.v = 0                                      # Control verbosity in Gravity.cpp
+mg.v = 0                                           # Control verbosity in the multigrid solver
+
+amr.derive_plot_vars = NONE                        # Calculate all variables for plotfiles, including derived variables
+
+
diff --git a/scaling/castro/wdmerger/cori_201710/probin_test_wdmerger_3D b/scaling/castro/wdmerger/cori_201710/probin_test_wdmerger_3D
new file mode 100644
index 0000000..9fa6ddb
--- /dev/null
+++ b/scaling/castro/wdmerger/cori_201710/probin_test_wdmerger_3D
@@ -0,0 +1,39 @@
+&fortin 
+  mass_P = 0.90
+  mass_S = 0.90
+
+  problem = 2
+
+  roche_radius_factor = 1.0d0
+
+  ambient_density = 1.0d-4
+
+  stellar_temp = 1.0d7
+  ambient_temp = 1.0d7
+
+  orbital_eccentricity = 0.0d0
+  orbital_angle = 0.0d0
+
+  max_tagging_radius = 0.75d0
+  stellar_density_threshold = 1.0d0
+ 
+  smallu = 1.0d-12
+
+  fill_ambient_bc = F
+/
+
+&tagging 
+  max_temperr_lev    = 4
+  temperr            = 2.d8
+/
+
+&sponge
+  sponge_lower_radius = 3.840d9
+  sponge_upper_radius = 4.352d9
+  sponge_timescale    = 0.01d0
+/
+
+&extern
+  use_eos_coulomb = F
+  eos_input_is_constant = T
+/
diff --git a/scaling/castro/wdmerger/summitdev_201806/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/summitdev_201806/inputs_test_wdmerger_3D
new file mode 100644
index 0000000..9a6b8ea
--- /dev/null
+++ b/scaling/castro/wdmerger/summitdev_201806/inputs_test_wdmerger_3D
@@ -0,0 +1,117 @@
+############################## CASTRO INPUTS ###############################################
+
+############################################################################################
+# Problem setup
+############################################################################################
+
+amr.probin_file = probin_test_wdmerger_3D          # Name of the probin file
+
+max_step = 5                                      # Maximum coarse timestep
+
+geometry.is_periodic = 0 0 0                       # Non-periodic boundary conditions
+
+geometry.coord_sys = 0                             # Cartesian coordinate system
+
+geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9         # Lower boundary limits in physical space
+geometry.prob_hi =  5.12e9  5.12e9  5.12e9         # Upper boundary limits in physical space
+castro.center =      0.0e0   0.0e0   0.0e0         # System center of mass
+
+castro.cfl = 0.5                                   # CFL number for hyperbolic system
+castro.init_shrink = 0.1                           # Scale back initial timestep by this factor
+castro.change_max = 1.1                            # Factor by which dt is allowed to change each timestep
+castro.hard_cfl_limit = 0                          # Whether to abort a simulation if the CFL criterion is locally violated
+
+############################################################################################
+# Boundary conditions
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+############################################################################################
+
+castro.lo_bc = 2 2 2                               # Boundary conditions on lo x, y, and z edges
+castro.hi_bc = 2 2 2                               # Boundary conditions on hi x, y, and z edges
+
+############################################################################################ 
+# Resolution, gridding and AMR
+############################################################################################
+
+amr.n_cell = 512 512 512                              # Number of cells on the coarse grid
+
+amr.max_level = 0                                    # Maximum level number allowed
+amr.ref_ratio = 4 2
+
+amr.max_grid_size = 32 48                          # Maximum grid size at each level
+amr.blocking_factor = 16                           # Grid sizes must be a multiple of this
+
+amr.grid_eff = 0.9                                 # What constitutes an efficient grid
+
+############################################################################################
+# Physics to include
+############################################################################################
+
+castro.do_hydro = 1                                # Whether or not to do hydrodynamics
+castro.do_grav = 1                                 # Whether or not to do gravity
+castro.do_react = 0                                # Whether or not to do reactions
+castro.do_sponge = 1                               # Whether or not to apply the sponge
+castro.add_ext_src = 1                             # Whether or not to apply external source terms
+castro.do_rotation = 0                             # Whether or not to include the rotation source term
+castro.rotational_period = 100.0                   # Rotational period of the rotating reference frame
+castro.rotational_dPdt = -0.0                      # Time rate of change of the rotational period
+castro.implicit_rotation_update = 1                # Implicit rotation coupling
+
+############################################################################################
+# PPM options
+############################################################################################
+
+castro.ppm_type = 1                                # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters)
+castro.ppm_reference = 1                           # Whether we subtract off a reference state in PPM
+castro.ppm_reference_eigenvectors = 1              # Whether to evaluate eigenvectors using the reference state
+castro.ppm_reference_edge_limit = 1                # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state
+castro.ppm_temp_fix = 0                            # Use the EOS in calculation of the edge states going into the Riemann solver
+castro.grav_source_type = 4                        # How to include the gravity source term in the hydro equations
+castro.rot_source_type = 4                         # How to include the rotation source term in the hydro equations
+
+############################################################################################
+# Thermodynamics
+############################################################################################
+
+castro.small_temp = 1.e5                           # Minimum allowable temperature (K)
+castro.small_dens = 1.e-5                          # Minimum allowable density (g / cm**3)
+
+castro.allow_negative_energy = 0                   # Disable the possibility of having a negative energy
+
+castro.dual_energy_update_E_from_e = 0             # Don't update the total energy using the internal energy
+castro.dual_energy_eta1 = 1.0e-3                   # Threshold for when to use the internal energy in calculating pressure
+castro.dual_energy_eta2 = 1.0e-1                   # Threshold for when to use (E - K) in updating internal energy
+
+############################################################################################
+# Gravity
+############################################################################################
+
+gravity.gravity_type = PoissonGrav                 # Full self-gravity with the Poisson equation
+gravity.max_multipole_order = 6                    # Multipole expansion includes terms up to r**(-max_multipole_order)
+gravity.rel_tol = 1.e-10                           # Relative tolerance for multigrid solver
+gravity.no_sync = 1                                # Turn off sync solve for gravity after refluxing
+############################################################################################
+# Diagnostics and I/O
+############################################################################################
+
+amr.plot_files_output = 0                          # Whether or not to output plotfiles
+amr.checkpoint_files_output = 0                    # Whether or not to output checkpoints
+
+amr.check_file = chk                               # Root name of checkpoint file
+amr.check_int = 10                                 # Number of timesteps between checkpoints
+amr.plot_file = plt                                # Root name of plot file
+amr.plot_int = 10                                  # Number of timesteps between plotfiles
+
+amr.v = 1                                          # Control verbosity in Amr.cpp
+castro.v = 0                                       # Control verbosity in Castro.cpp
+
+castro.print_fortran_warnings = 0
+
+gravity.v = 0                                      # Control verbosity in Gravity.cpp
+mg.v = 0                                           # Control verbosity in the multigrid solver
+
+amr.derive_plot_vars = NONE                        # Calculate all variables for plotfiles, including derived variables
+
+
diff --git a/scaling/castro/wdmerger/summitdev_201806/probin_test_wdmerger_3D b/scaling/castro/wdmerger/summitdev_201806/probin_test_wdmerger_3D
new file mode 100644
index 0000000..4dbe0ca
--- /dev/null
+++ b/scaling/castro/wdmerger/summitdev_201806/probin_test_wdmerger_3D
@@ -0,0 +1,39 @@
+&fortin
+  mass_P = 0.90
+  mass_S = 0.90
+
+  problem = 2
+
+  roche_radius_factor = 1.0d0
+
+  ambient_density = 1.0d-4
+
+  stellar_temp = 1.0d7
+  ambient_temp = 1.0d7
+
+  orbital_eccentricity = 0.0d0
+  orbital_angle = 0.0d0
+
+  max_tagging_radius = 0.75d0
+  stellar_density_threshold = 1.0d0
+
+  smallu = 1.0d-12
+
+/
+
+&tagging
+  max_temperr_lev    = 4
+  temperr            = 2.d8
+/
+
+&sponge
+  sponge_lower_radius = 3.840d9
+  sponge_upper_radius = 4.352d9
+  sponge_timescale    = 0.01d0
+/
+
+&extern
+  use_eos_coulomb = F
+  eos_input_is_constant = T
+/
+
diff --git a/scaling/castro/wdmerger/summitdev_201806/scaling.txt b/scaling/castro/wdmerger/summitdev_201806/scaling.txt
new file mode 100644
index 0000000..0b618b5
--- /dev/null
+++ b/scaling/castro/wdmerger/summitdev_201806/scaling.txt
@@ -0,0 +1,40 @@
+
+obtained the average time using
+
+grep -i "Coarse TimeStep" nvme_test.o320568 | awk '{sum += $6; count +=1} END {print sum/count}'
+
+For 10 steps **********************
+amr.n_cepp = 256, amr.max_level = 0
+
+#nodes   #cores    #MPI          #time 
+ 
+4        80        80            11.8403        
+
+8        160       160            6.79126
+
+16       320       320            9.65454 
+
+amr.n_cell = 256, amr.max_level = 1
+
+4                                86.9711
+
+8                                50.8912
+
+16
+
+amr.n_cell = 512, amr.max_level = 0
+
+4                                86.5123
+
+8                                43.7392
+
+16
+
+For 5 steps **************************
+amr.n_cell = 512, amr.max_level = 1
+ 
+4                               
+
+8                               278.195
+
+16
diff --git a/scaling/castro/wdmerger/summitdev_201806/summitd-160MPI.run b/scaling/castro/wdmerger/summitdev_201806/summitd-160MPI.run
new file mode 100644
index 0000000..bd70ba7
--- /dev/null
+++ b/scaling/castro/wdmerger/summitdev_201806/summitd-160MPI.run
@@ -0,0 +1,12 @@
+#!/bin/bash
+#BSUB -P AST106SUMDEV
+#BSUB -J wdmerger-160-scale
+#BSUB -o nvme_test.o%J
+#BSUB -W 00:50
+#BSUB -nnodes 8
+ 
+cd /lustre/atlas/scratch/$USER/ast106/wdmerger/512/1amr_8nodes_1thread
+
+export OMP_NUM_THREADS=1
+ 
+jsrun -n 160 -a 1 -c 1 ./Castro3d.gnu.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/summitdev_201806/summitd-320MPI.run b/scaling/castro/wdmerger/summitdev_201806/summitd-320MPI.run
new file mode 100644
index 0000000..f495997
--- /dev/null
+++ b/scaling/castro/wdmerger/summitdev_201806/summitd-320MPI.run
@@ -0,0 +1,12 @@
+#!/bin/bash
+#BSUB -P AST106SUMDEV
+#BSUB -J wdmerger-320-scale
+#BSUB -o nvme_test.o%J
+#BSUB -W 00:30
+#BSUB -nnodes 16
+ 
+cd /lustre/atlas/scratch/$USER/ast106/wdmerger/512/0amr_16nodes_1thread
+
+export OMP_NUM_THREADS=1
+ 
+jsrun -n 320 -a 1 -c 1 ./Castro3d.gnu.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/summitdev_201806/summitd-80MPI.run b/scaling/castro/wdmerger/summitdev_201806/summitd-80MPI.run
new file mode 100644
index 0000000..cacf2da
--- /dev/null
+++ b/scaling/castro/wdmerger/summitdev_201806/summitd-80MPI.run
@@ -0,0 +1,12 @@
+#!/bin/bash
+#BSUB -P AST106SUMDEV
+#BSUB -J wdmerger-80-scale
+#BSUB -o nvme_test.o%J
+#BSUB -W 02:25
+#BSUB -nnodes 4
+ 
+cd /lustre/atlas/scratch/$USER/ast106/wdmerger/512/1amr_4nodes_1thread
+
+export OMP_NUM_THREADS=1
+ 
+jsrun -n 80 -a 1 -c 1 ./Castro3d.gnu.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_041516/README b/scaling/castro/wdmerger/titan_041516/README
new file mode 100644
index 0000000..f154f13
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_041516/README
@@ -0,0 +1,10 @@
+Scaling data for the wdmerger problem on Titan on April 15, 2016.
+
+I ran tests with one, two, and three AMR levels (the higher levels refine
+around the stars). For all three I ran in the 4 MPI ranks + 4 OMP threads 
+per node configuration, plus a few cases of 2 MPI + 8 OMP for the three level 
+test. All data is for 10 timesteps per run.
+
+For the highest number of processors on the two-level run I also used 
+the space-filling curve distribution mapping rather than my default 
+knapsack mapping, and this was about 15% faster.
diff --git a/scaling/castro/wdmerger/titan_041516/one_level_omp4.txt b/scaling/castro/wdmerger/titan_041516/one_level_omp4.txt
new file mode 100644
index 0000000..1c79d5e
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_041516/one_level_omp4.txt
@@ -0,0 +1,5 @@
+    PROC     AVG     MIN     MAX
+      64    28.0    27.7    28.7
+     128    14.3    14.1    14.9
+     256     7.3     7.1     7.4
+
diff --git a/scaling/castro/wdmerger/titan_041516/three_level_omp4.txt b/scaling/castro/wdmerger/titan_041516/three_level_omp4.txt
new file mode 100644
index 0000000..cd53fb5
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_041516/three_level_omp4.txt
@@ -0,0 +1,6 @@
+    PROC     AVG     MIN     MAX
+    2048   233.2   223.1   236.5
+    4096   125.8   120.4   127.8
+    8192    84.0    82.2    85.4
+   16384    86.4    82.9    90.3
+
diff --git a/scaling/castro/wdmerger/titan_041516/three_level_omp8.txt b/scaling/castro/wdmerger/titan_041516/three_level_omp8.txt
new file mode 100644
index 0000000..03a40c1
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_041516/three_level_omp8.txt
@@ -0,0 +1,5 @@
+    PROC     AVG     MIN     MAX
+    2048   218.7   214.0   221.3
+    4096   141.0   133.5   144.2
+    8192    82.3    81.0    84.7
+
diff --git a/scaling/castro/wdmerger/titan_041516/two_level_omp4.txt b/scaling/castro/wdmerger/titan_041516/two_level_omp4.txt
new file mode 100644
index 0000000..6f0b704
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_041516/two_level_omp4.txt
@@ -0,0 +1,6 @@
+    PROC     AVG     MIN     MAX
+     256    50.5    49.3    52.0
+     512    27.4    26.8    28.2
+    1024    16.1    15.6    16.8
+    2048    11.7    11.3    12.7
+
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/256_0amr/avg_time.sh
new file mode 100755
index 0000000..993afb6
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_0amr/inputs_test_wdmerger_3D
new file mode 100644
index 0000000..9c437aa
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/inputs_test_wdmerger_3D
@@ -0,0 +1,114 @@
+
+############################## CASTRO INPUTS ###############################################
+
+############################################################################################
+# Problem setup
+############################################################################################
+
+amr.probin_file = probin_test_wdmerger_3D          # Name of the probin file
+
+max_step = 10                                      # Maximum coarse timestep
+
+geometry.is_periodic = 0 0 0                       # Non-periodic boundary conditions
+
+geometry.coord_sys = 0                             # Cartesian coordinate system
+
+geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9         # Lower boundary limits in physical space
+geometry.prob_hi =  5.12e9  5.12e9  5.12e9         # Upper boundary limits in physical space
+castro.center =      0.0e0   0.0e0   0.0e0         # System center of mass
+
+castro.cfl = 0.5                                   # CFL number for hyperbolic system
+castro.init_shrink = 0.1                           # Scale back initial timestep by this factor
+castro.change_max = 1.1                            # Factor by which dt is allowed to change each timestep
+castro.hard_cfl_limit = 0			   # Whether to abort a simulation if the CFL criterion is locally violated
+
+############################################################################################
+# Boundary conditions
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+############################################################################################
+
+castro.lo_bc = 2 2 2                               # Boundary conditions on lo x, y, and z edges
+castro.hi_bc = 2 2 2                               # Boundary conditions on hi x, y, and z edges
+
+############################################################################################ 
+# Resolution, gridding and AMR
+############################################################################################
+
+amr.n_cell = 256 256 256                              # Number of cells on the coarse grid
+
+amr.max_level = 0                                  # Maximum level number allowed
+amr.ref_ratio = 2
+
+amr.max_grid_size = 32                             # Maximum grid size at each level
+amr.blocking_factor = 16                           # Grid sizes must be a multiple of this
+
+amr.grid_eff = 0.9                                 # What constitutes an efficient grid
+
+############################################################################################
+# Physics to include
+############################################################################################
+
+castro.do_hydro = 1                                # Whether or not to do hydrodynamics
+castro.do_grav = 1                                 # Whether or not to do gravity
+castro.do_react = 0                                # Whether or not to do reactions
+castro.do_sponge = 1                               # Whether or not to apply the sponge
+castro.add_ext_src = 1                             # Whether or not to apply external source terms
+castro.do_rotation = 0                             # Whether or not to include the rotation source term
+castro.rotational_period = 100.0                   # Rotational period of the rotating reference frame
+castro.rotational_dPdt = -0.0                      # Time rate of change of the rotational period
+castro.implicit_rotation_update = 1                # Implicit rotation coupling
+
+############################################################################################
+# PPM options
+############################################################################################
+
+castro.ppm_type = 1                                # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters)
+castro.ppm_reference = 1                           # Whether we subtract off a reference state in PPM
+castro.ppm_reference_eigenvectors = 1              # Whether to evaluate eigenvectors using the reference state
+castro.ppm_reference_edge_limit = 1                # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state
+castro.ppm_temp_fix = 0                            # Use the EOS in calculation of the edge states going into the Riemann solver
+castro.grav_source_type = 4                        # How to include the gravity source term in the hydro equations
+castro.rot_source_type = 4                         # How to include the rotation source term in the hydro equations
+
+############################################################################################
+# Thermodynamics
+############################################################################################
+
+castro.small_temp = 1.e5                           # Minimum allowable temperature (K)
+castro.small_dens = 1.e-5                          # Minimum allowable density (g / cm**3)
+
+castro.allow_negative_energy = 0                   # Disable the possibility of having a negative energy
+
+castro.dual_energy_update_E_from_e = 0             # Don't update the total energy using the internal energy
+castro.dual_energy_eta1 = 1.0e-3                   # Threshold for when to use the internal energy in calculating pressure
+castro.dual_energy_eta2 = 1.0e-1                   # Threshold for when to use (E - K) in updating internal energy
+
+############################################################################################
+# Gravity
+############################################################################################
+
+gravity.gravity_type = PoissonGrav                 # Full self-gravity with the Poisson equation
+gravity.max_multipole_order = 6                    # Multipole expansion includes terms up to r**(-max_multipole_order)
+gravity.rel_tol = 1.e-10                           # Relative tolerance for multigrid solver
+gravity.no_sync = 1                                # Turn off sync solve for gravity after refluxing
+
+############################################################################################
+# Diagnostics and I/O
+############################################################################################
+
+amr.plot_files_output = 0                          # Whether or not to output plotfiles
+amr.checkpoint_files_output = 0                    # Whether or not to output checkpoints
+
+amr.check_file = chk                               # Root name of checkpoint file
+amr.check_int = 10                                 # Number of timesteps between checkpoints
+amr.plot_file = plt                                # Root name of plot file
+amr.plot_int = 10                                  # Number of timesteps between plotfiles
+
+amr.v = 1                                          # Control verbosity in Amr.cpp
+castro.v = 1                                       # Control verbosity in Castro.cpp
+gravity.v = 1                                      # Control verbosity in Gravity.cpp
+mg.v = 2                                           # Control verbosity in the multigrid solver
+
+amr.derive_plot_vars = NONE                        # Calculate all variables for plotfiles, including derived variables
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_0amr/probin_test_wdmerger_3D
new file mode 100644
index 0000000..9fa6ddb
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/probin_test_wdmerger_3D
@@ -0,0 +1,39 @@
+&fortin 
+  mass_P = 0.90
+  mass_S = 0.90
+
+  problem = 2
+
+  roche_radius_factor = 1.0d0
+
+  ambient_density = 1.0d-4
+
+  stellar_temp = 1.0d7
+  ambient_temp = 1.0d7
+
+  orbital_eccentricity = 0.0d0
+  orbital_angle = 0.0d0
+
+  max_tagging_radius = 0.75d0
+  stellar_density_threshold = 1.0d0
+ 
+  smallu = 1.0d-12
+
+  fill_ambient_bc = F
+/
+
+&tagging 
+  max_temperr_lev    = 4
+  temperr            = 2.d8
+/
+
+&sponge
+  sponge_lower_radius = 3.840d9
+  sponge_upper_radius = 4.352d9
+  sponge_timescale    = 0.01d0
+/
+
+&extern
+  use_eos_coulomb = F
+  eos_input_is_constant = T
+/
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP2-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP2-PE16.run
new file mode 100644
index 0000000..454edcb
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=64
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP4-PE16.run
new file mode 100644
index 0000000..e9094d0
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=128
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-PE16.run
new file mode 100644
index 0000000..b5b66df
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=32
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI128-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI128-PE16.run
new file mode 100644
index 0000000..56ac90f
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI128-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=8
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 128 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI2-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI2-PE16.run
new file mode 100644
index 0000000..2a02574
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N det-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=16
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 256 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI64-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI64-PE16.run
new file mode 100644
index 0000000..fd5308e
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPI64-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=4
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 64 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx2-OMP4-PE16.run
new file mode 100644
index 0000000..a1affe7
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx2-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:20:00,nodes=256
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 1024 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP4-PE16.run
new file mode 100644
index 0000000..dad47d8
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=512
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP8-PE16.run
new file mode 100644
index 0000000..199b67a
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx4-OMP8-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx8-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx8-OMP8-PE16.run
new file mode 100644
index 0000000..f0da840
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_0amr/titan-512-MPIx8-OMP8-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:00:00,nodes=2048
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/256_1amr/avg_time.sh
new file mode 100755
index 0000000..993afb6
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_1amr/inputs_test_wdmerger_3D
new file mode 100644
index 0000000..4db2567
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/inputs_test_wdmerger_3D
@@ -0,0 +1,114 @@
+
+############################## CASTRO INPUTS ###############################################
+
+############################################################################################
+# Problem setup
+############################################################################################
+
+amr.probin_file = probin_test_wdmerger_3D          # Name of the probin file
+
+max_step = 10                                      # Maximum coarse timestep
+
+geometry.is_periodic = 0 0 0                       # Non-periodic boundary conditions
+
+geometry.coord_sys = 0                             # Cartesian coordinate system
+
+geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9         # Lower boundary limits in physical space
+geometry.prob_hi =  5.12e9  5.12e9  5.12e9         # Upper boundary limits in physical space
+castro.center =      0.0e0   0.0e0   0.0e0         # System center of mass
+
+castro.cfl = 0.5                                   # CFL number for hyperbolic system
+castro.init_shrink = 0.1                           # Scale back initial timestep by this factor
+castro.change_max = 1.1                            # Factor by which dt is allowed to change each timestep
+castro.hard_cfl_limit = 0			   # Whether to abort a simulation if the CFL criterion is locally violated
+
+############################################################################################
+# Boundary conditions
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+############################################################################################
+
+castro.lo_bc = 2 2 2                               # Boundary conditions on lo x, y, and z edges
+castro.hi_bc = 2 2 2                               # Boundary conditions on hi x, y, and z edges
+
+############################################################################################ 
+# Resolution, gridding and AMR
+############################################################################################
+
+amr.n_cell = 256 256 256                              # Number of cells on the coarse grid
+
+amr.max_level = 1                                  # Maximum level number allowed
+amr.ref_ratio = 4
+
+amr.max_grid_size = 32                             # Maximum grid size at each level
+amr.blocking_factor = 16                           # Grid sizes must be a multiple of this
+
+amr.grid_eff = 0.9                                 # What constitutes an efficient grid
+
+############################################################################################
+# Physics to include
+############################################################################################
+
+castro.do_hydro = 1                                # Whether or not to do hydrodynamics
+castro.do_grav = 1                                 # Whether or not to do gravity
+castro.do_react = 0                                # Whether or not to do reactions
+castro.do_sponge = 1                               # Whether or not to apply the sponge
+castro.add_ext_src = 1                             # Whether or not to apply external source terms
+castro.do_rotation = 0                             # Whether or not to include the rotation source term
+castro.rotational_period = 100.0                   # Rotational period of the rotating reference frame
+castro.rotational_dPdt = -0.0                      # Time rate of change of the rotational period
+castro.implicit_rotation_update = 1                # Implicit rotation coupling
+
+############################################################################################
+# PPM options
+############################################################################################
+
+castro.ppm_type = 1                                # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters)
+castro.ppm_reference = 1                           # Whether we subtract off a reference state in PPM
+castro.ppm_reference_eigenvectors = 1              # Whether to evaluate eigenvectors using the reference state
+castro.ppm_reference_edge_limit = 1                # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state
+castro.ppm_temp_fix = 0                            # Use the EOS in calculation of the edge states going into the Riemann solver
+castro.grav_source_type = 4                        # How to include the gravity source term in the hydro equations
+castro.rot_source_type = 4                         # How to include the rotation source term in the hydro equations
+
+############################################################################################
+# Thermodynamics
+############################################################################################
+
+castro.small_temp = 1.e5                           # Minimum allowable temperature (K)
+castro.small_dens = 1.e-5                          # Minimum allowable density (g / cm**3)
+
+castro.allow_negative_energy = 0                   # Disable the possibility of having a negative energy
+
+castro.dual_energy_update_E_from_e = 0             # Don't update the total energy using the internal energy
+castro.dual_energy_eta1 = 1.0e-3                   # Threshold for when to use the internal energy in calculating pressure
+castro.dual_energy_eta2 = 1.0e-1                   # Threshold for when to use (E - K) in updating internal energy
+
+############################################################################################
+# Gravity
+############################################################################################
+
+gravity.gravity_type = PoissonGrav                 # Full self-gravity with the Poisson equation
+gravity.max_multipole_order = 6                    # Multipole expansion includes terms up to r**(-max_multipole_order)
+gravity.rel_tol = 1.e-10                           # Relative tolerance for multigrid solver
+gravity.no_sync = 1                                # Turn off sync solve for gravity after refluxing
+
+############################################################################################
+# Diagnostics and I/O
+############################################################################################
+
+amr.plot_files_output = 0                          # Whether or not to output plotfiles
+amr.checkpoint_files_output = 0                    # Whether or not to output checkpoints
+
+amr.check_file = chk                               # Root name of checkpoint file
+amr.check_int = 10                                 # Number of timesteps between checkpoints
+amr.plot_file = plt                                # Root name of plot file
+amr.plot_int = 10                                  # Number of timesteps between plotfiles
+
+amr.v = 1                                          # Control verbosity in Amr.cpp
+castro.v = 1                                       # Control verbosity in Castro.cpp
+gravity.v = 1                                      # Control verbosity in Gravity.cpp
+mg.v = 2                                           # Control verbosity in the multigrid solver
+
+amr.derive_plot_vars = NONE                        # Calculate all variables for plotfiles, including derived variables
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_1amr/probin_test_wdmerger_3D
new file mode 100644
index 0000000..9fa6ddb
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/probin_test_wdmerger_3D
@@ -0,0 +1,39 @@
+&fortin 
+  mass_P = 0.90
+  mass_S = 0.90
+
+  problem = 2
+
+  roche_radius_factor = 1.0d0
+
+  ambient_density = 1.0d-4
+
+  stellar_temp = 1.0d7
+  ambient_temp = 1.0d7
+
+  orbital_eccentricity = 0.0d0
+  orbital_angle = 0.0d0
+
+  max_tagging_radius = 0.75d0
+  stellar_density_threshold = 1.0d0
+ 
+  smallu = 1.0d-12
+
+  fill_ambient_bc = F
+/
+
+&tagging 
+  max_temperr_lev    = 4
+  temperr            = 2.d8
+/
+
+&sponge
+  sponge_lower_radius = 3.840d9
+  sponge_upper_radius = 4.352d9
+  sponge_timescale    = 0.01d0
+/
+
+&extern
+  use_eos_coulomb = F
+  eos_input_is_constant = T
+/
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-OMP4-PE16.run
new file mode 100644
index 0000000..e9094d0
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=128
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-PE16.run
new file mode 100644
index 0000000..b5b66df
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=32
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI2-OMP4-PE16.run
new file mode 100644
index 0000000..c92f7cc
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPI2-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=64
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 256 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx2-OMP4-PE16.run
new file mode 100644
index 0000000..980af95
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx2-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 1024 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx4-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx4-OMP4-PE16.run
new file mode 100644
index 0000000..e97d7fa
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx4-OMP4-PE16.run
@@ -0,0 +1,18 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=512
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of MPI PEs per node (2x NUMA -- up to 16))
+# -S number of MPI PEs per NUMA (up to 8 -- this is option replaces -N
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -S 2 -d 4 -j 2 -ss ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx8-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx8-OMP4-PE16.run
new file mode 100644
index 0000000..ebc2172
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-512-MPIx8-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI128-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI128-OMP4-PE16.run
new file mode 100644
index 0000000..f9c85bc
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI128-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=32
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 128 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI512-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI512-OMP8-PE16.run
new file mode 100644
index 0000000..7c791dc
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_1amr/titan-MPI512-OMP8-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/256_2amr/avg_time.sh
new file mode 100755
index 0000000..993afb6
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_2amr/inputs_test_wdmerger_3D
new file mode 100644
index 0000000..2595363
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/inputs_test_wdmerger_3D
@@ -0,0 +1,114 @@
+
+############################## CASTRO INPUTS ###############################################
+
+############################################################################################
+# Problem setup
+############################################################################################
+
+amr.probin_file = probin_test_wdmerger_3D          # Name of the probin file
+
+max_step = 10                                      # Maximum coarse timestep
+
+geometry.is_periodic = 0 0 0                       # Non-periodic boundary conditions
+
+geometry.coord_sys = 0                             # Cartesian coordinate system
+
+geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9         # Lower boundary limits in physical space
+geometry.prob_hi =  5.12e9  5.12e9  5.12e9         # Upper boundary limits in physical space
+castro.center =      0.0e0   0.0e0   0.0e0         # System center of mass
+
+castro.cfl = 0.5                                   # CFL number for hyperbolic system
+castro.init_shrink = 0.1                           # Scale back initial timestep by this factor
+castro.change_max = 1.1                            # Factor by which dt is allowed to change each timestep
+castro.hard_cfl_limit = 0			   # Whether to abort a simulation if the CFL criterion is locally violated
+
+############################################################################################
+# Boundary conditions
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+############################################################################################
+
+castro.lo_bc = 2 2 2                               # Boundary conditions on lo x, y, and z edges
+castro.hi_bc = 2 2 2                               # Boundary conditions on hi x, y, and z edges
+
+############################################################################################ 
+# Resolution, gridding and AMR
+############################################################################################
+
+amr.n_cell = 256 256 256                              # Number of cells on the coarse grid
+
+amr.max_level = 2                                  # Maximum level number allowed
+amr.ref_ratio = 4  2
+
+amr.max_grid_size = 32                             # Maximum grid size at each level
+amr.blocking_factor = 16                           # Grid sizes must be a multiple of this
+
+amr.grid_eff = 0.9                                 # What constitutes an efficient grid
+
+############################################################################################
+# Physics to include
+############################################################################################
+
+castro.do_hydro = 1                                # Whether or not to do hydrodynamics
+castro.do_grav = 1                                 # Whether or not to do gravity
+castro.do_react = 0                                # Whether or not to do reactions
+castro.do_sponge = 1                               # Whether or not to apply the sponge
+castro.add_ext_src = 1                             # Whether or not to apply external source terms
+castro.do_rotation = 0                             # Whether or not to include the rotation source term
+castro.rotational_period = 100.0                   # Rotational period of the rotating reference frame
+castro.rotational_dPdt = -0.0                      # Time rate of change of the rotational period
+castro.implicit_rotation_update = 1                # Implicit rotation coupling
+
+############################################################################################
+# PPM options
+############################################################################################
+
+castro.ppm_type = 1                                # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters)
+castro.ppm_reference = 1                           # Whether we subtract off a reference state in PPM
+castro.ppm_reference_eigenvectors = 1              # Whether to evaluate eigenvectors using the reference state
+castro.ppm_reference_edge_limit = 1                # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state
+castro.ppm_temp_fix = 0                            # Use the EOS in calculation of the edge states going into the Riemann solver
+castro.grav_source_type = 4                        # How to include the gravity source term in the hydro equations
+castro.rot_source_type = 4                         # How to include the rotation source term in the hydro equations
+
+############################################################################################
+# Thermodynamics
+############################################################################################
+
+castro.small_temp = 1.e5                           # Minimum allowable temperature (K)
+castro.small_dens = 1.e-5                          # Minimum allowable density (g / cm**3)
+
+castro.allow_negative_energy = 0                   # Disable the possibility of having a negative energy
+
+castro.dual_energy_update_E_from_e = 0             # Don't update the total energy using the internal energy
+castro.dual_energy_eta1 = 1.0e-3                   # Threshold for when to use the internal energy in calculating pressure
+castro.dual_energy_eta2 = 1.0e-1                   # Threshold for when to use (E - K) in updating internal energy
+
+############################################################################################
+# Gravity
+############################################################################################
+
+gravity.gravity_type = PoissonGrav                 # Full self-gravity with the Poisson equation
+gravity.max_multipole_order = 6                    # Multipole expansion includes terms up to r**(-max_multipole_order)
+gravity.rel_tol = 1.e-10                           # Relative tolerance for multigrid solver
+gravity.no_sync = 1                                # Turn off sync solve for gravity after refluxing
+
+############################################################################################
+# Diagnostics and I/O
+############################################################################################
+
+amr.plot_files_output = 0                          # Whether or not to output plotfiles
+amr.checkpoint_files_output = 0                    # Whether or not to output checkpoints
+
+amr.check_file = chk                               # Root name of checkpoint file
+amr.check_int = 10                                 # Number of timesteps between checkpoints
+amr.plot_file = plt                                # Root name of plot file
+amr.plot_int = 10                                  # Number of timesteps between plotfiles
+
+amr.v = 1                                          # Control verbosity in Amr.cpp
+castro.v = 1                                       # Control verbosity in Castro.cpp
+gravity.v = 1                                      # Control verbosity in Gravity.cpp
+mg.v = 2                                           # Control verbosity in the multigrid solver
+
+amr.derive_plot_vars = NONE                        # Calculate all variables for plotfiles, including derived variables
diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/256_2amr/probin_test_wdmerger_3D
new file mode 100644
index 0000000..9fa6ddb
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/probin_test_wdmerger_3D
@@ -0,0 +1,39 @@
+&fortin 
+  mass_P = 0.90
+  mass_S = 0.90
+
+  problem = 2
+
+  roche_radius_factor = 1.0d0
+
+  ambient_density = 1.0d-4
+
+  stellar_temp = 1.0d7
+  ambient_temp = 1.0d7
+
+  orbital_eccentricity = 0.0d0
+  orbital_angle = 0.0d0
+
+  max_tagging_radius = 0.75d0
+  stellar_density_threshold = 1.0d0
+ 
+  smallu = 1.0d-12
+
+  fill_ambient_bc = F
+/
+
+&tagging 
+  max_temperr_lev    = 4
+  temperr            = 2.d8
+/
+
+&sponge
+  sponge_lower_radius = 3.840d9
+  sponge_upper_radius = 4.352d9
+  sponge_timescale    = 0.01d0
+/
+
+&extern
+  use_eos_coulomb = F
+  eos_input_is_constant = T
+/
diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI-OMP4-PE16.run
new file mode 100644
index 0000000..ee3ac73
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=128
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI2-OMP4-PE16.run
new file mode 100644
index 0000000..d48c57d
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPI2-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=64
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 256 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx2-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx2-OMP4-PE16.run
new file mode 100644
index 0000000..980af95
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx2-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 1024 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP4-PE16.run
new file mode 100644
index 0000000..dad47d8
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=512
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP8-PE16.run
new file mode 100644
index 0000000..199b67a
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/256_2amr/titan-512-MPIx4-OMP8-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-256-scale
+#PBS -j oe
+#PBS -l walltime=1:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/512_0amr/avg_time.sh
new file mode 100755
index 0000000..993afb6
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/512_0amr/inputs_test_wdmerger_3D
new file mode 100644
index 0000000..8dbc623
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/inputs_test_wdmerger_3D
@@ -0,0 +1,114 @@
+
+############################## CASTRO INPUTS ###############################################
+
+############################################################################################
+# Problem setup
+############################################################################################
+
+amr.probin_file = probin_test_wdmerger_3D          # Name of the probin file
+
+max_step = 10                                      # Maximum coarse timestep
+
+geometry.is_periodic = 0 0 0                       # Non-periodic boundary conditions
+
+geometry.coord_sys = 0                             # Cartesian coordinate system
+
+geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9         # Lower boundary limits in physical space
+geometry.prob_hi =  5.12e9  5.12e9  5.12e9         # Upper boundary limits in physical space
+castro.center =      0.0e0   0.0e0   0.0e0         # System center of mass
+
+castro.cfl = 0.5                                   # CFL number for hyperbolic system
+castro.init_shrink = 0.1                           # Scale back initial timestep by this factor
+castro.change_max = 1.1                            # Factor by which dt is allowed to change each timestep
+castro.hard_cfl_limit = 0			   # Whether to abort a simulation if the CFL criterion is locally violated
+
+############################################################################################
+# Boundary conditions
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+############################################################################################
+
+castro.lo_bc = 2 2 2                               # Boundary conditions on lo x, y, and z edges
+castro.hi_bc = 2 2 2                               # Boundary conditions on hi x, y, and z edges
+
+############################################################################################ 
+# Resolution, gridding and AMR
+############################################################################################
+
+amr.n_cell = 512 512 512                           # Number of cells on the coarse grid
+
+amr.max_level = 0                                  # Maximum level number allowed
+amr.ref_ratio = 2
+
+amr.max_grid_size = 32                             # Maximum grid size at each level
+amr.blocking_factor = 16                           # Grid sizes must be a multiple of this
+
+amr.grid_eff = 0.9                                 # What constitutes an efficient grid
+
+############################################################################################
+# Physics to include
+############################################################################################
+
+castro.do_hydro = 1                                # Whether or not to do hydrodynamics
+castro.do_grav = 1                                 # Whether or not to do gravity
+castro.do_react = 0                                # Whether or not to do reactions
+castro.do_sponge = 1                               # Whether or not to apply the sponge
+castro.add_ext_src = 1                             # Whether or not to apply external source terms
+castro.do_rotation = 0                             # Whether or not to include the rotation source term
+castro.rotational_period = 100.0                   # Rotational period of the rotating reference frame
+castro.rotational_dPdt = -0.0                      # Time rate of change of the rotational period
+castro.implicit_rotation_update = 1                # Implicit rotation coupling
+
+############################################################################################
+# PPM options
+############################################################################################
+
+castro.ppm_type = 1                                # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters)
+castro.ppm_reference = 1                           # Whether we subtract off a reference state in PPM
+castro.ppm_reference_eigenvectors = 1              # Whether to evaluate eigenvectors using the reference state
+castro.ppm_reference_edge_limit = 1                # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state
+castro.ppm_temp_fix = 0                            # Use the EOS in calculation of the edge states going into the Riemann solver
+castro.grav_source_type = 4                        # How to include the gravity source term in the hydro equations
+castro.rot_source_type = 4                         # How to include the rotation source term in the hydro equations
+
+############################################################################################
+# Thermodynamics
+############################################################################################
+
+castro.small_temp = 1.e5                           # Minimum allowable temperature (K)
+castro.small_dens = 1.e-5                          # Minimum allowable density (g / cm**3)
+
+castro.allow_negative_energy = 0                   # Disable the possibility of having a negative energy
+
+castro.dual_energy_update_E_from_e = 0             # Don't update the total energy using the internal energy
+castro.dual_energy_eta1 = 1.0e-3                   # Threshold for when to use the internal energy in calculating pressure
+castro.dual_energy_eta2 = 1.0e-1                   # Threshold for when to use (E - K) in updating internal energy
+
+############################################################################################
+# Gravity
+############################################################################################
+
+gravity.gravity_type = PoissonGrav                 # Full self-gravity with the Poisson equation
+gravity.max_multipole_order = 6                    # Multipole expansion includes terms up to r**(-max_multipole_order)
+gravity.rel_tol = 1.e-10                           # Relative tolerance for multigrid solver
+gravity.no_sync = 1                                # Turn off sync solve for gravity after refluxing
+
+############################################################################################
+# Diagnostics and I/O
+############################################################################################
+
+amr.plot_files_output = 0                          # Whether or not to output plotfiles
+amr.checkpoint_files_output = 0                    # Whether or not to output checkpoints
+
+amr.check_file = chk                               # Root name of checkpoint file
+amr.check_int = 10                                 # Number of timesteps between checkpoints
+amr.plot_file = plt                                # Root name of plot file
+amr.plot_int = 10                                  # Number of timesteps between plotfiles
+
+amr.v = 1                                          # Control verbosity in Amr.cpp
+castro.v = 1                                       # Control verbosity in Castro.cpp
+gravity.v = 1                                      # Control verbosity in Gravity.cpp
+mg.v = 2                                           # Control verbosity in the multigrid solver
+
+amr.derive_plot_vars = NONE                        # Calculate all variables for plotfiles, including derived variables
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/512_0amr/probin_test_wdmerger_3D
new file mode 100644
index 0000000..9fa6ddb
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/probin_test_wdmerger_3D
@@ -0,0 +1,39 @@
+&fortin 
+  mass_P = 0.90
+  mass_S = 0.90
+
+  problem = 2
+
+  roche_radius_factor = 1.0d0
+
+  ambient_density = 1.0d-4
+
+  stellar_temp = 1.0d7
+  ambient_temp = 1.0d7
+
+  orbital_eccentricity = 0.0d0
+  orbital_angle = 0.0d0
+
+  max_tagging_radius = 0.75d0
+  stellar_density_threshold = 1.0d0
+ 
+  smallu = 1.0d-12
+
+  fill_ambient_bc = F
+/
+
+&tagging 
+  max_temperr_lev    = 4
+  temperr            = 2.d8
+/
+
+&sponge
+  sponge_lower_radius = 3.840d9
+  sponge_upper_radius = 4.352d9
+  sponge_timescale    = 0.01d0
+/
+
+&extern
+  use_eos_coulomb = F
+  eos_input_is_constant = T
+/
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI1024-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI1024-PE16.run
new file mode 100644
index 0000000..4f1d576
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI1024-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=64
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 1024 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI2048-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI2048-PE16.run
new file mode 100644
index 0000000..9b3051b
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI2048-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=128
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP2-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP2-PE16.run
new file mode 100644
index 0000000..8976565
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=512
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP4-PE16.run
new file mode 100644
index 0000000..db26b9d
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-PE16.run
new file mode 100644
index 0000000..64c1f08
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI4096-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI512-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI512-PE16.run
new file mode 100644
index 0000000..2d2066e
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_0amr/titan-MPI512-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=32
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/512_1amr/avg_time.sh
new file mode 100755
index 0000000..993afb6
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/inputs_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/512_1amr/inputs_test_wdmerger_3D
new file mode 100644
index 0000000..9914ff5
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/inputs_test_wdmerger_3D
@@ -0,0 +1,114 @@
+
+############################## CASTRO INPUTS ###############################################
+
+############################################################################################
+# Problem setup
+############################################################################################
+
+amr.probin_file = probin_test_wdmerger_3D          # Name of the probin file
+
+max_step = 10                                      # Maximum coarse timestep
+
+geometry.is_periodic = 0 0 0                       # Non-periodic boundary conditions
+
+geometry.coord_sys = 0                             # Cartesian coordinate system
+
+geometry.prob_lo = -5.12e9 -5.12e9 -5.12e9         # Lower boundary limits in physical space
+geometry.prob_hi =  5.12e9  5.12e9  5.12e9         # Upper boundary limits in physical space
+castro.center =      0.0e0   0.0e0   0.0e0         # System center of mass
+
+castro.cfl = 0.5                                   # CFL number for hyperbolic system
+castro.init_shrink = 0.1                           # Scale back initial timestep by this factor
+castro.change_max = 1.1                            # Factor by which dt is allowed to change each timestep
+castro.hard_cfl_limit = 0			   # Whether to abort a simulation if the CFL criterion is locally violated
+
+############################################################################################
+# Boundary conditions
+# 0 = Interior           3 = Symmetry
+# 1 = Inflow             4 = SlipWall
+# 2 = Outflow            5 = NoSlipWall
+############################################################################################
+
+castro.lo_bc = 2 2 2                               # Boundary conditions on lo x, y, and z edges
+castro.hi_bc = 2 2 2                               # Boundary conditions on hi x, y, and z edges
+
+############################################################################################ 
+# Resolution, gridding and AMR
+############################################################################################
+
+amr.n_cell = 512 512 512                           # Number of cells on the coarse grid
+
+amr.max_level = 1                                  # Maximum level number allowed
+amr.ref_ratio = 4
+
+amr.max_grid_size = 32                             # Maximum grid size at each level
+amr.blocking_factor = 16                           # Grid sizes must be a multiple of this
+
+amr.grid_eff = 0.9                                 # What constitutes an efficient grid
+
+############################################################################################
+# Physics to include
+############################################################################################
+
+castro.do_hydro = 1                                # Whether or not to do hydrodynamics
+castro.do_grav = 1                                 # Whether or not to do gravity
+castro.do_react = 0                                # Whether or not to do reactions
+castro.do_sponge = 1                               # Whether or not to apply the sponge
+castro.add_ext_src = 1                             # Whether or not to apply external source terms
+castro.do_rotation = 0                             # Whether or not to include the rotation source term
+castro.rotational_period = 100.0                   # Rotational period of the rotating reference frame
+castro.rotational_dPdt = -0.0                      # Time rate of change of the rotational period
+castro.implicit_rotation_update = 1                # Implicit rotation coupling
+
+############################################################################################
+# PPM options
+############################################################################################
+
+castro.ppm_type = 1                                # Piecewise parabolic with the original limiters (0 is piecewise linear; 2 is new limiters)
+castro.ppm_reference = 1                           # Whether we subtract off a reference state in PPM
+castro.ppm_reference_eigenvectors = 1              # Whether to evaluate eigenvectors using the reference state
+castro.ppm_reference_edge_limit = 1                # Use the wave moving fastest toward the interface instead of the cell centered value as the reference state
+castro.ppm_temp_fix = 0                            # Use the EOS in calculation of the edge states going into the Riemann solver
+castro.grav_source_type = 4                        # How to include the gravity source term in the hydro equations
+castro.rot_source_type = 4                         # How to include the rotation source term in the hydro equations
+
+############################################################################################
+# Thermodynamics
+############################################################################################
+
+castro.small_temp = 1.e5                           # Minimum allowable temperature (K)
+castro.small_dens = 1.e-5                          # Minimum allowable density (g / cm**3)
+
+castro.allow_negative_energy = 0                   # Disable the possibility of having a negative energy
+
+castro.dual_energy_update_E_from_e = 0             # Don't update the total energy using the internal energy
+castro.dual_energy_eta1 = 1.0e-3                   # Threshold for when to use the internal energy in calculating pressure
+castro.dual_energy_eta2 = 1.0e-1                   # Threshold for when to use (E - K) in updating internal energy
+
+############################################################################################
+# Gravity
+############################################################################################
+
+gravity.gravity_type = PoissonGrav                 # Full self-gravity with the Poisson equation
+gravity.max_multipole_order = 6                    # Multipole expansion includes terms up to r**(-max_multipole_order)
+gravity.rel_tol = 1.e-10                           # Relative tolerance for multigrid solver
+gravity.no_sync = 1                                # Turn off sync solve for gravity after refluxing
+
+############################################################################################
+# Diagnostics and I/O
+############################################################################################
+
+amr.plot_files_output = 0                          # Whether or not to output plotfiles
+amr.checkpoint_files_output = 0                    # Whether or not to output checkpoints
+
+amr.check_file = chk                               # Root name of checkpoint file
+amr.check_int = 10                                 # Number of timesteps between checkpoints
+amr.plot_file = plt                                # Root name of plot file
+amr.plot_int = 10                                  # Number of timesteps between plotfiles
+
+amr.v = 1                                          # Control verbosity in Amr.cpp
+castro.v = 1                                       # Control verbosity in Castro.cpp
+gravity.v = 1                                      # Control verbosity in Gravity.cpp
+mg.v = 2                                           # Control verbosity in the multigrid solver
+
+amr.derive_plot_vars = NONE                        # Calculate all variables for plotfiles, including derived variables
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/probin_test_wdmerger_3D b/scaling/castro/wdmerger/titan_20171011/512_1amr/probin_test_wdmerger_3D
new file mode 100644
index 0000000..9fa6ddb
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/probin_test_wdmerger_3D
@@ -0,0 +1,39 @@
+&fortin 
+  mass_P = 0.90
+  mass_S = 0.90
+
+  problem = 2
+
+  roche_radius_factor = 1.0d0
+
+  ambient_density = 1.0d-4
+
+  stellar_temp = 1.0d7
+  ambient_temp = 1.0d7
+
+  orbital_eccentricity = 0.0d0
+  orbital_angle = 0.0d0
+
+  max_tagging_radius = 0.75d0
+  stellar_density_threshold = 1.0d0
+ 
+  smallu = 1.0d-12
+
+  fill_ambient_bc = F
+/
+
+&tagging 
+  max_temperr_lev    = 4
+  temperr            = 2.d8
+/
+
+&sponge
+  sponge_lower_radius = 3.840d9
+  sponge_upper_radius = 4.352d9
+  sponge_timescale    = 0.01d0
+/
+
+&extern
+  use_eos_coulomb = F
+  eos_input_is_constant = T
+/
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI1024-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI1024-PE16.run
new file mode 100644
index 0000000..4f1d576
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI1024-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=64
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 1024 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP2-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP2-PE16.run
new file mode 100644
index 0000000..38917c6
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=512
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP8-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP8-PE16.run
new file mode 100644
index 0000000..21f7dc4
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-OMP8-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:20:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=8
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 2 -d 8 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-PE16.run
new file mode 100644
index 0000000..9b3051b
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI2048-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=128
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 2048 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP2-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP2-PE16.run
new file mode 100644
index 0000000..8976565
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP2-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=512
+#PBS -q debug
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=2
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 8 -d 2 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP4-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP4-PE16.run
new file mode 100644
index 0000000..db26b9d
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-OMP4-PE16.run
@@ -0,0 +1,17 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=1024
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=4
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 4 -d 4 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-PE16.run
new file mode 100644
index 0000000..64c1f08
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI4096-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=256
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 4096 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI512-PE16.run b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI512-PE16.run
new file mode 100644
index 0000000..2d2066e
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/512_1amr/titan-MPI512-PE16.run
@@ -0,0 +1,19 @@
+#!/bin/bash
+#PBS -A ast106sbu
+#PBS -N wdmerger-512-scale
+#PBS -j oe
+#PBS -l walltime=0:30:00,nodes=32
+#PBS -q batch
+#PBS -l gres=atlas1%atlas2
+
+cd $PBS_O_WORKDIR
+
+export OMP_NUM_THREADS=1
+
+# -n number of MPI tasks
+# -N number of PEs per node 
+# -d number of CPUs per PE -- this should be the number of threads
+# -j number of CPUs to use per compute unit (since 2 PE share an FPU, we may want to reduce this)
+aprun -n 512 -N 16 -d 1 -j 2 ./Castro3d.pgi.interlagos.MPI.OMP.ex inputs_test_wdmerger_3D
+
+
diff --git a/scaling/castro/wdmerger/titan_20171011/avg_time.sh b/scaling/castro/wdmerger/titan_20171011/avg_time.sh
new file mode 100755
index 0000000..993afb6
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/avg_time.sh
@@ -0,0 +1,3 @@
+#/bin/sh
+# standard deviation is via sum of squares expression
+grep -i "Coarse TimeStep" $1 | tail -10 | awk '{sum += $6; sumsq += $6^2; count +=1} END {print sum/count " " sqrt(sumsq/count - (sum/count)^2)}'
diff --git a/scaling/castro/wdmerger/titan_20171011/castro-wdmerger-scaling.txt b/scaling/castro/wdmerger/titan_20171011/castro-wdmerger-scaling.txt
new file mode 100644
index 0000000..02e089b
--- /dev/null
+++ b/scaling/castro/wdmerger/titan_20171011/castro-wdmerger-scaling.txt
@@ -0,0 +1,62 @@
+# this was run on 2017-10-07 using the wdmerger problem in 3-d on
+# titan with the PGI 17.7 compilers.
+
+# problem size: 256^3, max_grid_size=32
+# single level: 512 boxes at max
+
+# MPI    OMP    cores     nodes     max_grid    nzones    max_level    avg-time-per-step    stddev
+   64     1       64         4         32         256        0              37.0713         0.189262
+  128     1      128         8         32         256        0              18.8354         0.0999653
+  256     1      256        16         32         256        0               9.64843        0.0564946
+  512     1      512        32         32         256        0               4.96933        0.0271702
+  512     2     1024        64         32         256        0               2.69029        0.0417153
+  512     4     2048       128         32         256        0               1.59607        0.0281456
+# 1024     4     4096       256         32         256        0               1.41143        0.0101482
+
+
+# one AMR level (4x)
+
+# MPI    OMP    cores     nodes     max_grid    nzones    max_level    avg-time-per-step    stddev
+  128     4      512        32         32         256        1                53.6792       2.40441
+  256     4     1024        64         32         256        1                30.104        0.950528
+  512     4     2048       128         32         256        1                19.718        0.659894
+#  512     8     4096       256         32         256        1                15.1611       0.554613
+ 1024     4     4096       256         32         256        1                14.0598       0.769543
+ 2048     4     8192       512         32         256        1                13.5301       0.882619
+
+
+# two AMR level (4x, 2x)
+
+# MPI    OMP    cores     nodes     max_grid    nzones    max_level    avg-time-per-step    stddev
+  256     4     1024        64         32         256        2               104.924        2.2645
+  512     4     2048       128         32         256        2                68.8377       1.26659
+ 1024     4     4096       256         32         256        2                46.0887       1.44635
+ 2048     4     8192       512         32         256        2                44.116        1.78023
+ 2048     8    16384      1024         32         256        2                43.7937       2.36656
+
+
+
+# problem size: 512^3, max_grid_size = 32
+# single level: 4096 boxes at max
+
+# MPI    OMP    cores     nodes     max_grid    nzones    max_level    avg-time-per-step    stddev
+  512     1      512        32         32         512        0                39.0762       0.270942
+ 1024     1     1024        64         32         512        0                20.6763       0.153724
+ 2048     1     2048       128         32         512        0                11.307        0.0863866
+ 4096     1     4096       256         32         512        0                 6.66855      0.086108
+ 4096     2     8192       512         32         512        0                 4.24551      0.0617616
+ 4096     4    16394      1024         32         512        0                 3.08108      0.0691301
+
+
+# one AMR level (4x)
+
+# MPI    OMP    cores     nodes     max_grid    nzones    max_level    avg-time-per-step    stddev
+#  512     1      512        32         32         512        1          OOM
+# 1024     1     1024        64         32         512        1          OOM
+ 2048     1     2048       128         32         512        1               105.317        4.19868
+ 2048     2     4096       256         32         512        1                66.8652       3.54906
+ 2048     4     8192       512         32         512        1                48.5837       3.56045
+ 2048     8    16384      1024         32         512        1                36.5839       2.4316
+# 4096     1     4096       256         32         512        1                71.3143       3.19988
+# 4096     2     8192       512         32         512        1                49.3618       3.17413
+# 4096     4    16394      1024         32         512        1                37.0667       2.54384