Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Scripts for evaluating code performance #57

Draft
wants to merge 19 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions scripts/aerosol_performance/CB05CL_AE5_w_simpolSOA/inputs.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
inputs=${TCHEM_INSTALL_PATH}/examples/runs/atmospheric_chemistry/CB05CL_AE5_w_simpolSOA
inputfile_particles=${inputs}/scenario_conditions_particle.yaml
chemfile=${inputs}/config_full_gas.yaml
aerofile=${inputs}/mechanism_aero.yaml

export scenario_n_inputs="--chemfile=$chemfile \
--aerofile=$aerofile \
--inputfile_particles=$inputfile_particles"
23 changes: 23 additions & 0 deletions scripts/aerosol_performance/CB05CL_AE5_w_simpolSOA/runCUDA.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
export DEVICE=GPU
export experiment_exe_root=${experiment_base}${experiment_suffix}
export sacado_flag=$sflag
echo "sacado flag = ${sacado_flag}"
source ../loadGPU_WS.sh
source ./inputs.sh

export experiment_name="${experiment_name}-${sacado_flag}"
echo "experiment name is ${experiment_name}"
echo "experiment exe is ${exec}"
use_cloned_samples=true
# change to true if you want to output the reaction rates outputs.
verbose=false
#we will save outputs in this directory
tchem_outputs=CUDA
mkdir -p ${tchem_outputs}/${experiment_name}
nbatch=(72 144 216)
# 288 360 432 504 576 648 720 792 864 936 1008)
# let's use for now 1 and 1 for team and vector size; however, we need to tune up these parameters.
vector_size=(1)
team_size=(1)
nparticles=(1 10 100 1000)
source ../runThisGPU.sh
24 changes: 24 additions & 0 deletions scripts/aerosol_performance/CB05CL_AE5_w_simpolSOA/runHOSTWS.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
export DEVICE=CPU
export experiment_exe_root=${experiment_base}${experiment_suffix}
export sacado_flag=$sflag
echo "sacado flag = ${sacado_flag}"
source ../loadCPU_WS.sh
source ./inputs.sh

export experiment_name="${experiment_name}-${sacado_flag}"
echo "experiment name is ${experiment_name}"
echo "experiment exe is ${exec}"
use_cloned_samples=true
# change to true if you want to output the reaction rates outputs.
verbose=false
#we will save outputs in this directory
tchem_outputs=HOST
mkdir -p ${tchem_outputs}/${experiment_name}
#20 100 1000 10000 100000 200000 500000 1000000 2000000 3000000 4500000
nbatch=(1)
# 144 216 288)
# 360 432 504 576 648 720 792 864 936 1008)
Nthread=(52 104)
nparticles=(1 10 50 100)
# 500 1000 5000 10000
source ../runThisCPU.sh
6 changes: 6 additions & 0 deletions scripts/aerosol_performance/loadMachinePathsTemplate.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@

export KOKKOS_TOOLS_LIBS=/path/kokkostools/libs/libkp_kernel_timer.so
TCHEM_INSTALL_PATH=/path/tchem/install
exec=${TCHEM_INSTALL_PATH}/examples/${experiment_exe_root}.x
machine_name=/name/machine

125 changes: 125 additions & 0 deletions scripts/aerosol_performance/runAll.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
#measure wall times for the following mechanisms
dirs=(CB05CL_AE5_w_simpolSOA)
sacado_flags=(no_sacado)
export experiment_base=TChem_AerosolChemistry

cpu_solver_strList=(
# "trbdf"
# "kokkoskernels"
# "cvode"
"rhss"
)
cpu_exe_string=(
# ""
# "_KokkosKernels"
# "_CVODE"
"_RHSs"
)
gpu_solver_strList=(
# "trbdf"
# "kokkoskernels"
#"rhss"
)
gpu_exe_string=(
# ""
# "_KokkosKernels"
#"_RHSs"
)

rhss_params(){
export numerical_params=""
}
trbdf_params(){
t_iterPerInt=1
min_dt='1e-3'
max_dt='1'
atol_t='1e-12'
tol_time='1e-3'
tend='1'
export numerical_params="--tol-time=$tol_time \
--time-iterations-per-interval=$t_iterPerInt \
--dtmin=$min_dt \
--dtmax=$max_dt \
--atol-time=${atol_t} \
--tend=$tend \
--atol-newton=1e-18 \
--rtol-newton=1e-8 \
--max-newton-iterations=20 \
--max-time-iterations=20000 "

}

kokkoskernels_params(){
t_iterPerInt=1
min_dt='1e-1'
max_dt='1'
atol_t='1e-12'
tol_time='1e-3'
tend='1'
export numerical_params="--tol-time=$tol_time \
--time-iterations-per-interval=$t_iterPerInt \
--dtmin=$min_dt \
--dtmax=$max_dt \
--atol-time=${atol_t} \
--tend=$tend "
}

cvode_params(){
t_iterPerInt=10
min_dt='1e-20'
max_dt='1'
atol_t='1e-12'
tol_time='1e-3'
tend='1'
export numerical_params="--tol-time=$tol_time \
--time-iterations-per-interval=$t_iterPerInt \
--dtmin=$min_dt \
--dtmax=$max_dt \
--use-cvode=true \
--atol-time=${atol_t} \
--tend=$tend "
}

exe=runHOSTWS.sh
for i in "${!cpu_solver_strList[@]}"; do
export experiment_name="${cpu_solver_strList[i]}"
export experiment_suffix="${cpu_exe_string[i]}"
params_fxn="${experiment_name}_params"
printf "setting variables for %s case (CPU)\n" "${experiment_name}"
${params_fxn}
for dir in ${dirs[@]}; do
for sacado in ${sacado_flags[@]}; do
if [ "${experiment_name}" == "expEuler" ] && [ "${sacado}" == "sacado" ]; then
continue
elif [ "${experiment_name}" == "cvode" ] && [ "${sacado}" == "sacado" ]; then
continue
fi
export sflag=$sacado
run_this="cd $dir;./$exe;cd -"
echo $run_this
eval $run_this
done
done
echo "end of outer loop"
done

exe=runCUDA.sh
for i in "${!gpu_solver_strList[@]}"; do
export experiment_name="${gpu_solver_strList[i]}"
export experiment_suffix="${gpu_exe_string[i]}"
params_fxn="${experiment_name}_params"
printf "setting variables for %s case (GPU)\n" "${experiment_name}"
${params_fxn}
for dir in ${dirs[@]}; do
for sacado in ${sacado_flags[@]}; do
if [ "${experiment_name}" == "expEuler" ] && [ "${sacado}" == "sacado" ]; then
continue
fi
export sflag=$sacado
run_this="cd $dir;./$exe;cd -"
echo $run_this
eval $run_this
done
done
echo "end of outer loop"
done
31 changes: 31 additions & 0 deletions scripts/aerosol_performance/runThisCPU.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
for number_of_particles in ${nparticles[@]}; do
for N in ${nbatch[@]}; do
for threads in ${Nthread[@]}; do
echo "nbatch = $N"
echo "Nthread = $threads"
thread_size=${threads}
output_wall_times="${tchem_outputs}/${experiment_name}/wall_times_nbatch_${N}_thread_size_${thread_size}_number_of_particles${number_of_particles}.json"
output="${tchem_outputs}/${experiment_name}/reaction_rates_nbatch_${N}_thread_size_${thread_size}_number_of_particles${number_of_particles}.txt"
echo "${tchem_outputs}"
echo "${experiment_name}"
echo "/reaction_rates_nbatch_${N}_thread_size_${thread_size}.txt"
echo "output file = " $output
run_this="OMP_NUM_THREADS=$thread_size OMP_PLACES=threads OMP_PROC_BIND=close ${exec} \
--batch_size=$N \
--use_cloned_samples=$use_cloned_samples \
--verbose=$verbose \
--outputfile_times=$output_wall_times \
--outputfile=$output \
--number_of_particles=$number_of_particles \
$scenario_n_inputs \
$numerical_params "
echo $run_this
eval $run_this
sleep 2
# Note: we use this with kokkos tools
kp_json_writer $machine_name* > "${tchem_outputs}/${experiment_name}/simple_timer_nbatch_${N}_thread_size_${thread_size}_number_of_particles${number_of_particles}.json"
sleep 2
rm -rf $machine_name*
done
done
done
33 changes: 33 additions & 0 deletions scripts/aerosol_performance/runThisGPU.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
for number_of_particles in ${nparticles[@]}; do
for N in ${nbatch[@]}; do
for i in "${!vector_size[@]}"; do
vector_thread_size=${vector_size[i]}
team_thread_size=${team_size[i]}
echo "nbatch = $N"
echo "vector thread size = $vector_thread_size"
echo "team thread size = $team_thread_size"
output_wall_times="${tchem_outputs}/${experiment_name}/wall_times_nbatch_${N}_vecsize_${vector_thread_size}_teamThread_size_${team_thread_size}_number_of_particles${number_of_particles}.json"
output_file="${tchem_outputs}/${experiment_name}/reaction_rates_nbatch_${N}_vecsize_${vector_thread_size}_teamThread_size_${team_thread_size}_number_of_particles${number_of_particles}.txt"
echo "${tchem_outputs}"
echo "${experiment_name}"
echo "/reaction_rates_nbatch_${N}_vecsize_${vector_thread_size}_teamThread_size_${team_thread_size}.txt"
echo "output file = ${output_file}"
run_this="OMP_NUM_THREADS=1 OMP_PLACES=threads OMP_PROC_BIND=close $exec \
--batch_size=$N \
--team_thread_size=$vector_thread_size \
--vector_thread_size=$vector_thread_size \
--use_cloned_samples=$use_cloned_samples \
--verbose=$verbose \
--outputfile_times=$output_wall_times \
--outputfile=$output_file \
$scenario_n_inputs \
$numerical_params "
echo $run_this
eval $run_this
sleep 2
kp_json_writer $machine_name* > "${tchem_outputs}/${experiment_name}/simple_timer_nbatch_${N}_vecsize_${vector_thread_size}_teamThread_size_${team_thread_size}_number_of_particles${number_of_particles}.json"
sleep 2
rm -rf $machine_name*
done
done
done
37 changes: 37 additions & 0 deletions scripts/build_kokkostools.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@

MY_CC=gcc
MY_CXX=g++
JFLAG="-j 10"

REPO_BASE=${PWD}/kokkos-tools
BUILD_BASE=${PWD}/kokkos-tools
INSTALL_BASE=${PWD}/kokkos-tools

get_kokkostools (){
echo "get kokkostools:"
if [ -d "${KOKKOSTOOLS_REPOSITORY_PATH}" ] && [ "$(ls -A ${KOKKOSTOOLS_REPOSITORY_PATH})" ]; then
echo "${KOKKOSTOOLS_REPOSITORY_PATH} exists and is not empty ... aborting clone"; return
fi
git clone https://github.com/kokkos/kokkos-tools.git ${KOKKOSTOOLS_REPOSITORY_PATH}
}

build_install_kokkostools(){
echo "Building kokkos tools:"
mkdir ${KOKKOSTOOLS_BUILD_PATH}
cd ${KOKKOSTOOLS_BUILD_PATH}
cmake \
-D CMAKE_INSTALL_PREFIX=${KOKKOSTOOLS_INSTALL_PATH} \
-D CMAKE_CXX_COMPILER="${MY_CXX}" \
-D CMAKE_C_COMPILER="${MY_CC}" \
-D CMAKE_BUILD_TYPE=RELEASE \
${KOKKOSTOOLS_REPOSITORY_PATH}
make ${JFLAG} install
}
KOKKOSTOOLS_REPOSITORY_PATH=${REPO_BASE}/main
KOKKOSTOOLS_BUILD_PATH=${BUILD_BASE}/build
KOKKOSTOOLS_INSTALL_PATH=${INSTALL_BASE}/install

get_kokkostools
build_install_kokkostools

exit
11 changes: 11 additions & 0 deletions src/core/TChem_AerosolModelData.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -225,6 +225,17 @@ int AerosolModelData::initChem(YAML::Node &root,
return 0;
}

void AerosolModelData::setNumberofParticles(const ordinal_type number_of_particles)
{
printf("-------------------------------------------------------\n");
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We should consider using some sort of preprocessor directive for debug print statements

printf("--------------------Warning----------------------------\n");
printf("Setting number of particles\n");
printf("Old value : %d \n", nParticles_);
nParticles_=number_of_particles;
printf("Current value : %d \n", nParticles_);
printf("-------------------------------------------------------\n");
}

void AerosolModelData::scenarioConditionParticles(const std::string &mechfile,
const ordinal_type nBatch,
real_type_2d_view_host& num_concentration,
Expand Down
4 changes: 3 additions & 1 deletion src/core/TChem_AerosolModelData.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ namespace TChem {
ordinal_type nSpec_gas_;
ordinal_type nConstSpec_gas_;
// number of particles
ordinal_type nParticles_;
ordinal_type nParticles_{-1};
// aerosol molecular weights and density
real_type_1d_dual_view molecular_weights_, aerosol_density_;
simpol_phase_transfer_type_1d_dual_view simpol_params_;
Expand All @@ -64,6 +64,8 @@ namespace TChem {
void initFile(const std::string &mechfile,
std::ostream& echofile);
ordinal_type initChem(YAML::Node& doc, std::ostream& echofile);
void setNumberofParticles(const ordinal_type number_of_particles);

void setGasParameters(const KineticModelData& kmd);
void scenarioConditionParticles(const std::string &mechfile,
const ordinal_type nBatch,
Expand Down
8 changes: 4 additions & 4 deletions src/core/TChem_AtmosphericChemistry.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -539,9 +539,9 @@ TChem::AtmChemistry::setScenarioConditionsPhotolysisReactions(const std::string&
if (nBatch_photo_reation == nBatch)
{
// std::cout << photo_rate_values << "\n";
for (int ibacth = 0; ibacth < nBatch; ++ibacth)
for (int ibatch = 0; ibatch < nBatch; ++ibatch)
{
photo_rates_host(ibacth, ireac) = photo_rate_values[ibacth].as<real_type>();
photo_rates_host(ibatch, ireac) = photo_rate_values[ibatch].as<real_type>();
}
} else {
printf("Error number of values in photo reaction is different than number of conditions");
Expand Down Expand Up @@ -610,9 +610,9 @@ if (root["external_forcing"]){

if (nBatch_ext_forcing == nBatch)
{
for (int ibacth = 0; ibacth < nBatch; ++ibacth)
for (int ibatch = 0; ibatch < nBatch; ++ibatch)
{
external_forcing_host(ibacth, sp_idx) = ext_forcing_values[ibacth].as<real_type>();
external_forcing_host(ibatch, sp_idx) = ext_forcing_values[ibatch].as<real_type>();
}
} else {
printf("Error number of values in external forcing is different than number of conditions");
Expand Down
4 changes: 2 additions & 2 deletions src/core/impl/TChem_Impl_Aerosol_RHS.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -84,10 +84,10 @@ struct Aerosol_RHS
work,
kmcd);
// 2. update RHS of gas and aerosol species
member.team_barrier();
// member.team_barrier();
using SIMPOL_single_particle_type = TChem::Impl::SIMPOL_single_particle<real_type, device_type >;
Kokkos::parallel_for(
Kokkos::TeamThreadRange(member, amcd.nParticles),
Kokkos::ThreadVectorRange(member, amcd.nParticles),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Not suggesting this needs to change, but I am curious what the range policy thread vector range does instead of using team thread range in this parallel for?

[&](const ordinal_type& i_part) {
for (size_t i_simpol = 0; i_simpol < amcd.nSimpol_tran; i_simpol++)
{
Expand Down
3 changes: 2 additions & 1 deletion src/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ LIST(APPEND TCHEM_ATM_EXAMPLE_SOURCES
TChem_AtmosphericChemistryE3SM_CVODE.cpp
TChem_AerosolChemistry_CVODE.cpp
TChem_AerosolChemistry.cpp
TChem_AerosolChemistry_RHSs.cpp
)
IF(TCHEM_ATM_ENABLE_TPL_KOKKOSKERNELS)
LIST(APPEND TCHEM_ATM_EXAMPLE_SOURCES
Expand Down Expand Up @@ -72,4 +73,4 @@ TARGET_LINK_LIBRARIES(${TCHEM_ATM_EXAMPLE_EXE} ${TCHEM_ATM_LINK_LIBRARIES})
INSTALL(TARGETS ${TCHEM_ATM_EXAMPLE_EXE}
PERMISSIONS OWNER_EXECUTE OWNER_READ OWNER_WRITE
DESTINATION "${CMAKE_INSTALL_PREFIX}/${TCHEM_ATM_INSTALL_EXAMPLE_PATH}")
ENDIF()
ENDIF()
Loading
Loading