From 86b6965b6ca6aaea3f74c5e6ce07fe86e5dde736 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Thu, 29 Aug 2024 13:44:57 -0600 Subject: [PATCH 1/4] Update rrtmgp branch --- components/eam/src/physics/rrtmgp/external | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/eam/src/physics/rrtmgp/external b/components/eam/src/physics/rrtmgp/external index 02c2e03df33..8ff525eeed1 160000 --- a/components/eam/src/physics/rrtmgp/external +++ b/components/eam/src/physics/rrtmgp/external @@ -1 +1 @@ -Subproject commit 02c2e03df33edca6101670168ca187cb94c0768e +Subproject commit 8ff525eeed1d87a2ca6f251c4d16b46222c5554d From 5afe73dcde08ea9c68fc3e8c4486bce49a1c5251 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Thu, 29 Aug 2024 13:45:23 -0600 Subject: [PATCH 2/4] Set kokkos to be the default rrtmgp backend --- components/eamxx/CMakeLists.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/components/eamxx/CMakeLists.txt b/components/eamxx/CMakeLists.txt index 398efa3bc2d..ac2737f2fb6 100644 --- a/components/eamxx/CMakeLists.txt +++ b/components/eamxx/CMakeLists.txt @@ -210,8 +210,8 @@ endif() # #cmakedefine RRTMGP_EXPENSIVE_CHECKS option (SCREAM_RRTMGP_DEBUG "Turn on extra debug checks in RRTMGP" ${SCREAM_DEBUG}) -option(SCREAM_RRTMGP_ENABLE_YAKL "Use YAKL under rrtmgp" TRUE) -option(SCREAM_RRTMGP_ENABLE_KOKKOS "Use Kokkos under rrtmgp" FALSE) +option(SCREAM_RRTMGP_ENABLE_YAKL "Use YAKL under rrtmgp" FALSE) +option(SCREAM_RRTMGP_ENABLE_KOKKOS "Use Kokkos under rrtmgp" TRUE) if (SCREAM_RRTMGP_ENABLE_YAKL) add_definitions("-DRRTMGP_ENABLE_YAKL") endif() From 4c3e57509382eb889ac070036e5778918f304e6b Mon Sep 17 00:00:00 2001 From: James Foucar Date: Thu, 29 Aug 2024 17:12:20 -0600 Subject: [PATCH 3/4] Allow option to force rrtmgp inputs to be layoutleft --- .../rrtmgp/eamxx_rrtmgp_process_interface.cpp | 260 ++++++++++-------- .../rrtmgp/eamxx_rrtmgp_process_interface.hpp | 30 +- 2 files changed, 167 insertions(+), 123 deletions(-) diff --git a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp index e89e02fe301..67536538b48 100644 --- a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp +++ b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.cpp @@ -23,6 +23,48 @@ using KT = KokkosTypes; using ExeSpace = KT::ExeSpace; using MemberType = KT::MemberType; +namespace { + +struct ConvertToRrtmgpSubview +{ + int beg; + int ncol; + + template + View subview1d(const View& v) const { + return View(v, std::make_pair(beg, beg+ncol)); + } + + template + View subview2d_impl(const View& v, const int inner_dim) const { + return View(v, std::make_pair(beg, beg+ncol), std::make_pair(0, inner_dim)); + } + +#ifdef RRTMGP_LAYOUT_LEFT + template + BufferView subview2d(const FieldView&, const BufferView& buffer_view, const int inner_dim) const { + return BufferView(buffer_view, std::make_pair(0, ncol), Kokkos::ALL); + } +#else + // Be sure to trim the excess + // items from the field manager views due to simd packs. If we don't trim, then + // check_range_k will fail due to looking at unused values. Once rrtmgp can handle + // packs, this won't be necessary. + template + FieldView subview2d(const FieldView& field_view, const BufferView&, const int inner_dim) const { + return subview2d_impl(field_view, inner_dim); + } +#endif + + template + View subview3d(const View& v) const { + // The range assumes these are buffer views, not fields + return View(v, std::make_pair(0, ncol), Kokkos::ALL, Kokkos::ALL); + } +}; + +} + RRTMGPRadiation:: RRTMGPRadiation (const ekat::Comm& comm, const ekat::ParameterList& params) : AtmosphereProcess(comm, params) @@ -255,7 +297,12 @@ size_t RRTMGPRadiation::requested_buffer_size_in_bytes() const Buffer::num_3d_nlay_nswbands*m_col_chunk_size*(m_nlay)*m_nswbands + Buffer::num_3d_nlay_nlwbands*m_col_chunk_size*(m_nlay)*m_nlwbands + Buffer::num_3d_nlay_nswgpts*m_col_chunk_size*(m_nlay)*m_nswgpts + - Buffer::num_3d_nlay_nlwgpts*m_col_chunk_size*(m_nlay)*m_nlwgpts; + Buffer::num_3d_nlay_nlwgpts*m_col_chunk_size*(m_nlay)*m_nlwgpts * +#if defined(RRTMGP_ENABLE_YAKL) && defined(RRTMGP_ENABLE_KOKKOS) + 2; +#else + 1; +#endif return interface_request * sizeof(Real); } // RRTMGPRadiation::requested_buffer_size @@ -409,13 +456,7 @@ void RRTMGPRadiation::init_buffers(const ATMBufferManager &buffer_manager) mem += m_buffer.cld_tau_lw_bnd.totElems(); #endif - // During the transition to kokkos, the buffer views/arrays will point to the same memory, - // so care is needed to avoid repeating calculations in such a way that answers change. - // Example: buff_view(x) += foo; - // Stuff like this cannot be done twice when both kokkos and yakl are enabled #ifdef RRTMGP_ENABLE_KOKKOS - mem = reinterpret_cast(buffer_manager.get_memory()); - // 1d arrays m_buffer.mu0_k = decltype(m_buffer.mu0_k)(mem, m_col_chunk_size); mem += m_buffer.mu0_k.size(); @@ -847,7 +888,11 @@ void RRTMGPRadiation::run_impl (const double dt) { this->log(LogLevel::debug, "[RRTMGP::run_impl] Col chunk beg,end: " + std::to_string(beg) + ", " + std::to_string(beg+ncol) + "\n"); - + // d_tint and d_dz are used in eamxx calls and therefore + // must be layout right + ulrreal2dk d_tint = ulrreal2dk(m_buffer.d_tint.data(), m_col_chunk_size, m_nlay+1); + ulrreal2dk d_dz = ulrreal2dk(m_buffer.d_dz.data(), m_col_chunk_size, m_nlay); + auto d_mu0 = m_buffer.cosine_zenith; #ifdef RRTMGP_ENABLE_YAKL // Create YAKL arrays. RRTMGP expects YAKL arrays with styleFortran, i.e., data has ncol // as the fastest index. For this reason we must copy the data. @@ -920,123 +965,68 @@ void RRTMGPRadiation::run_impl (const double dt) { auto cld_tau_lw_gpt = subview_3d(m_buffer.cld_tau_lw_gpt); #endif #ifdef RRTMGP_ENABLE_KOKKOS - // If YAKL is on, we don't want aliased memory in both the yakl and kokos - // subviews, so make new views and deep_copy. Also, be sure to trim the excess - // items from the field manager views due to simd packs. If we don't trim, then - // check_range_k will fail due to looking at unused values. Once rrtmgp can handle - // packs, this won't be necessary. - auto subview_1dk = [&](const ureal1dk& v) -> ureal1dk { - ureal1dk subv(v, std::make_pair(beg, beg+ncol)); -#ifdef RRTMGP_ENABLE_YAKL - real1dk rv(v.label(), ncol); - Kokkos::deep_copy(rv, subv); - return rv; -#else - return subv; -#endif - }; - auto subview_1dkc = [&](const cureal1dk& v) -> cureal1dk { - cureal1dk subv(v, std::make_pair(beg, beg+ncol)); -#ifdef RRTMGP_ENABLE_YAKL - creal1dk rv(v.label(), ncol); - Kokkos::deep_copy(rv, subv); - return rv; -#else - return subv; -#endif - }; - auto subview_2dk = [&](const ureal2dk& v, const int inner_dim) -> ureal2dk { - ureal2dk subv(v, std::make_pair(beg, beg+ncol), std::make_pair(0, inner_dim)); -#ifdef RRTMGP_ENABLE_YAKL - real2dk rv(v.label(), ncol, v.extent(1)); - Kokkos::deep_copy(rv, subv); - return rv; -#else - return subv; -#endif - }; - auto subview_2dkc = [&](const cureal2dk& v, const int inner_dim) -> cureal2dk { - cureal2dk subv(v, std::make_pair(beg, beg+ncol), std::make_pair(0, inner_dim)); -#ifdef RRTMGP_ENABLE_YAKL - creal2dk rv(v.label(), ncol, v.extent(1)); - Kokkos::deep_copy(rv, subv); - return rv; -#else - return subv; -#endif - }; - auto subview_3dk = [&](const ureal3dk& v) -> ureal3dk { - ureal3dk subv(v, std::make_pair(0, ncol), Kokkos::ALL, Kokkos::ALL); // The range assumes these are buffer views, not fields -#ifdef RRTMGP_ENABLE_YAKL - real3dk rv(v.label(), ncol, v.extent(1), v.extent(2)); - Kokkos::deep_copy(rv, subv); - return rv; -#else - return subv; -#endif - }; + ConvertToRrtmgpSubview conv = {beg, ncol}; // Note, ncol will not necessary be m_col_chunk_size because the number of cols // will not always be evenly divided by m_col_chunk_size. In most cases, the // extra space will not cause any problems, but it does sometimes. - auto p_lay_k = subview_2dkc(d_pmid, m_nlay); - auto t_lay_k = subview_2dkc(d_tmid, m_nlay); - auto p_lev_k = subview_2dkc(d_pint, m_nlay+1); - auto p_del_k = subview_2dkc(d_pdel, m_nlay); + auto p_lay_k = conv.subview2d(d_pmid, m_buffer.p_lay_k, m_nlay); + auto t_lay_k = conv.subview2d(d_tmid, m_buffer.t_lay_k, m_nlay); + auto p_lev_k = conv.subview2d(d_pint, m_buffer.p_lev_k, m_nlay+1); + auto z_del_k = conv.subview2d(d_dz, m_buffer.z_del_k, m_nlay); + auto p_del_k = conv.subview2d(d_pdel, m_buffer.p_del_k, m_nlay); + auto t_lev_k = conv.subview2d(d_tint, m_buffer.t_lev_k, m_nlay+1); auto sfc_alb_dir_k = m_buffer.sfc_alb_dir_k; auto sfc_alb_dif_k = m_buffer.sfc_alb_dif_k; - auto sfc_alb_dir_vis_k = subview_1dkc(d_sfc_alb_dir_vis); - auto sfc_alb_dir_nir_k = subview_1dkc(d_sfc_alb_dir_nir); - auto sfc_alb_dif_vis_k = subview_1dkc(d_sfc_alb_dif_vis); - auto sfc_alb_dif_nir_k = subview_1dkc(d_sfc_alb_dif_nir); - auto qc_k = subview_2dkc(d_qc, m_nlay); - auto nc_k = subview_2dkc(d_nc, m_nlay); - auto qi_k = subview_2dkc(d_qi, m_nlay); + auto sfc_alb_dir_vis_k = conv.subview1d(d_sfc_alb_dir_vis); + auto sfc_alb_dir_nir_k = conv.subview1d(d_sfc_alb_dir_nir); + auto sfc_alb_dif_vis_k = conv.subview1d(d_sfc_alb_dif_vis); + auto sfc_alb_dif_nir_k = conv.subview1d(d_sfc_alb_dif_nir); + auto qc_k = conv.subview2d(d_qc, m_buffer.qc_k, m_nlay); + auto nc_k = conv.subview2d(d_nc, m_buffer.nc_k, m_nlay); + auto qi_k = conv.subview2d(d_qi, m_buffer.qi_k, m_nlay); auto cldfrac_tot_k = m_buffer.cldfrac_tot_k; - auto rel_k = subview_2dkc(d_rel, m_nlay); - auto rei_k = subview_2dkc(d_rei, m_nlay); - auto sw_flux_up_k = subview_2dk(d_sw_flux_up, m_nlay+1); - auto sw_flux_dn_k = subview_2dk(d_sw_flux_dn, m_nlay+1); - auto sw_flux_dn_dir_k = subview_2dk(d_sw_flux_dn_dir, m_nlay+1); - auto lw_flux_up_k = subview_2dk(d_lw_flux_up, m_nlay+1); - auto lw_flux_dn_k = subview_2dk(d_lw_flux_dn, m_nlay+1); - auto sw_clnclrsky_flux_up_k = subview_2dk(d_sw_clnclrsky_flux_up, m_nlay+1); - auto sw_clnclrsky_flux_dn_k = subview_2dk(d_sw_clnclrsky_flux_dn, m_nlay+1); - auto sw_clnclrsky_flux_dn_dir_k = subview_2dk(d_sw_clnclrsky_flux_dn_dir, m_nlay+1); - auto sw_clrsky_flux_up_k = subview_2dk(d_sw_clrsky_flux_up, m_nlay+1); - auto sw_clrsky_flux_dn_k = subview_2dk(d_sw_clrsky_flux_dn, m_nlay+1); - auto sw_clrsky_flux_dn_dir_k = subview_2dk(d_sw_clrsky_flux_dn_dir, m_nlay+1); - auto sw_clnsky_flux_up_k = subview_2dk(d_sw_clnsky_flux_up, m_nlay+1); - auto sw_clnsky_flux_dn_k = subview_2dk(d_sw_clnsky_flux_dn, m_nlay+1); - auto sw_clnsky_flux_dn_dir_k = subview_2dk(d_sw_clnsky_flux_dn_dir, m_nlay+1); - auto lw_clnclrsky_flux_up_k = subview_2dk(d_lw_clnclrsky_flux_up, m_nlay+1); - auto lw_clnclrsky_flux_dn_k = subview_2dk(d_lw_clnclrsky_flux_dn, m_nlay+1); - auto lw_clrsky_flux_up_k = subview_2dk(d_lw_clrsky_flux_up, m_nlay+1); - auto lw_clrsky_flux_dn_k = subview_2dk(d_lw_clrsky_flux_dn, m_nlay+1); - auto lw_clnsky_flux_up_k = subview_2dk(d_lw_clnsky_flux_up, m_nlay+1); - auto lw_clnsky_flux_dn_k = subview_2dk(d_lw_clnsky_flux_dn, m_nlay+1); + auto rel_k = conv.subview2d(d_rel, m_buffer.eff_radius_qc_k, m_nlay); + auto rei_k = conv.subview2d(d_rei, m_buffer.eff_radius_qi_k, m_nlay); + auto sw_flux_up_k = conv.subview2d(d_sw_flux_up, m_buffer.sw_flux_up_k, m_nlay+1); + auto sw_flux_dn_k = conv.subview2d(d_sw_flux_dn, m_buffer.sw_flux_dn_k, m_nlay+1); + auto sw_flux_dn_dir_k = conv.subview2d(d_sw_flux_dn_dir, m_buffer.sw_flux_dn_dir_k, m_nlay+1); + auto lw_flux_up_k = conv.subview2d(d_lw_flux_up, m_buffer.lw_flux_up_k, m_nlay+1); + auto lw_flux_dn_k = conv.subview2d(d_lw_flux_dn, m_buffer.lw_flux_dn_k, m_nlay+1); + auto sw_clnclrsky_flux_up_k = conv.subview2d(d_sw_clnclrsky_flux_up, m_buffer.sw_clnclrsky_flux_up_k, m_nlay+1); + auto sw_clnclrsky_flux_dn_k = conv.subview2d(d_sw_clnclrsky_flux_dn, m_buffer.sw_clnclrsky_flux_dn_k, m_nlay+1); + auto sw_clnclrsky_flux_dn_dir_k = conv.subview2d(d_sw_clnclrsky_flux_dn_dir, m_buffer.sw_clnclrsky_flux_dn_dir_k, m_nlay+1); + auto sw_clrsky_flux_up_k = conv.subview2d(d_sw_clrsky_flux_up, m_buffer.sw_clrsky_flux_up_k, m_nlay+1); + auto sw_clrsky_flux_dn_k = conv.subview2d(d_sw_clrsky_flux_dn, m_buffer.sw_clrsky_flux_dn_k, m_nlay+1); + auto sw_clrsky_flux_dn_dir_k = conv.subview2d(d_sw_clrsky_flux_dn_dir, m_buffer.sw_clrsky_flux_dn_dir_k, m_nlay+1); + auto sw_clnsky_flux_up_k = conv.subview2d(d_sw_clnsky_flux_up, m_buffer.sw_clnsky_flux_up_k, m_nlay+1); + auto sw_clnsky_flux_dn_k = conv.subview2d(d_sw_clnsky_flux_dn, m_buffer.sw_clnsky_flux_dn_k, m_nlay+1); + auto sw_clnsky_flux_dn_dir_k = conv.subview2d(d_sw_clnsky_flux_dn_dir, m_buffer.sw_clnsky_flux_dn_dir_k, m_nlay+1); + auto lw_clnclrsky_flux_up_k = conv.subview2d(d_lw_clnclrsky_flux_up, m_buffer.lw_clnclrsky_flux_up_k, m_nlay+1); + auto lw_clnclrsky_flux_dn_k = conv.subview2d(d_lw_clnclrsky_flux_dn, m_buffer.lw_clnclrsky_flux_dn_k, m_nlay+1); + auto lw_clrsky_flux_up_k = conv.subview2d(d_lw_clrsky_flux_up, m_buffer.lw_clrsky_flux_up_k, m_nlay+1); + auto lw_clrsky_flux_dn_k = conv.subview2d(d_lw_clrsky_flux_dn, m_buffer.lw_clrsky_flux_dn_k, m_nlay+1); + auto lw_clnsky_flux_up_k = conv.subview2d(d_lw_clnsky_flux_up, m_buffer.lw_clnsky_flux_up_k, m_nlay+1); + auto lw_clnsky_flux_dn_k = conv.subview2d(d_lw_clnsky_flux_dn, m_buffer.lw_clnsky_flux_dn_k, m_nlay+1); auto sw_bnd_flux_up_k = m_buffer.sw_bnd_flux_up_k; auto sw_bnd_flux_dn_k = m_buffer.sw_bnd_flux_dn_k; auto sw_bnd_flux_dir_k = m_buffer.sw_bnd_flux_dir_k; auto sw_bnd_flux_dif_k = m_buffer.sw_bnd_flux_dif_k; auto lw_bnd_flux_up_k = m_buffer.lw_bnd_flux_up_k; auto lw_bnd_flux_dn_k = m_buffer.lw_bnd_flux_dn_k; - auto sfc_flux_dir_vis_k = subview_1dk(d_sfc_flux_dir_vis); - auto sfc_flux_dir_nir_k = subview_1dk(d_sfc_flux_dir_nir); - auto sfc_flux_dif_vis_k = subview_1dk(d_sfc_flux_dif_vis); - auto sfc_flux_dif_nir_k = subview_1dk(d_sfc_flux_dif_nir); + auto sfc_flux_dir_vis_k = conv.subview1d(d_sfc_flux_dir_vis); + auto sfc_flux_dir_nir_k = conv.subview1d(d_sfc_flux_dir_nir); + auto sfc_flux_dif_vis_k = conv.subview1d(d_sfc_flux_dif_vis); + auto sfc_flux_dif_nir_k = conv.subview1d(d_sfc_flux_dif_nir); auto aero_tau_sw_k = m_buffer.aero_tau_sw_k; auto aero_ssa_sw_k = m_buffer.aero_ssa_sw_k; auto aero_g_sw_k = m_buffer.aero_g_sw_k; auto aero_tau_lw_k = m_buffer.aero_tau_lw_k; - auto cld_tau_sw_bnd_k = subview_3dk(m_buffer.cld_tau_sw_bnd_k); - auto cld_tau_lw_bnd_k = subview_3dk(m_buffer.cld_tau_lw_bnd_k); - auto cld_tau_sw_gpt_k = subview_3dk(m_buffer.cld_tau_sw_gpt_k); - auto cld_tau_lw_gpt_k = subview_3dk(m_buffer.cld_tau_lw_gpt_k); + auto cld_tau_sw_bnd_k = conv.subview3d(m_buffer.cld_tau_sw_bnd_k); + auto cld_tau_lw_bnd_k = conv.subview3d(m_buffer.cld_tau_lw_bnd_k); + auto cld_tau_sw_gpt_k = conv.subview3d(m_buffer.cld_tau_sw_gpt_k); + auto cld_tau_lw_gpt_k = conv.subview3d(m_buffer.cld_tau_lw_gpt_k); #endif - auto d_tint = m_buffer.d_tint; - auto d_dz = m_buffer.d_dz; - auto d_mu0 = m_buffer.cosine_zenith; // Set gas concs to "view" only the first ncol columns #ifdef RRTMGP_ENABLE_YAKL @@ -1045,7 +1035,7 @@ void RRTMGPRadiation::run_impl (const double dt) { #endif #ifdef RRTMGP_ENABLE_KOKKOS m_gas_concs_k.ncol = ncol; - m_gas_concs_k.concs = subview_3dk(gas_concs_k); + m_gas_concs_k.concs = conv.subview3d(gas_concs_k); #endif // Copy data from the FieldManager to the YAKL arrays @@ -1156,6 +1146,26 @@ void RRTMGPRadiation::run_impl (const double dt) { } #endif #ifdef RRTMGP_ENABLE_KOKKOS +#ifdef RRTMGP_LAYOUT_LEFT + // Copy to layout left buffer views + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { + p_lay_k(i,k) = d_pmid(icol,k); + t_lay_k(i,k) = d_tmid(icol,k); + z_del_k(i,k) = d_dz(i,k); + p_del_k(i,k) = d_pdel(icol,k); + qc_k(i,k) = d_qc(icol,k); + nc_k(i,k) = d_nc(icol,k); + qi_k(i,k) = d_qi(icol,k); + rel_k(i,k) = d_rel(icol,k); + rei_k(i,k) = d_rei(icol,k); + p_lev_k(i,k) = d_pint(icol,k); + t_lev_k(i,k) = d_tint(i,k); + }); + + p_lev_k(i,nlay) = d_pint(icol,nlay); + t_lev_k(i,nlay) = d_tint(i,nlay); +#endif + // Note that RRTMGP expects ordering (col,lay,bnd) but the FM keeps things in (col,bnd,lay) order if (do_aerosol_rad) { Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nswbands*nlay), [&] (const int&idx) { @@ -1215,7 +1225,7 @@ void RRTMGPRadiation::run_impl (const double dt) { auto f = name=="o3" ? get_field_in(full_name) : get_field_out(full_name); auto d_vmr = f.get_view(); #ifdef RRTMGP_ENABLE_KOKKOS - auto tmp2d_k = subview_2dkc(d_vmr, m_nlay); + auto tmp2d_k = conv.subview2d_impl(d_vmr, m_nlay); #endif #ifdef RRTMGP_ENABLE_YAKL @@ -1328,11 +1338,8 @@ void RRTMGPRadiation::run_impl (const double dt) { iwp(i+1,k+1) *= 1e3; #endif #ifdef RRTMGP_ENABLE_KOKKOS -#ifndef RRTMGP_ENABLE_YAKL - // lwp and lwp_k point to the same memory lwp_k(i,k) *= 1e3; iwp_k(i,k) *= 1e3; -#endif #endif }); }); @@ -1385,7 +1392,7 @@ void RRTMGPRadiation::run_impl (const double dt) { #ifdef RRTMGP_ENABLE_KOKKOS interface_t::rrtmgp_main( ncol, m_nlay, - p_lay_k, t_lay_k, p_lev_k, d_tint, + p_lay_k, t_lay_k, p_lev_k, t_lev_k, m_gas_concs_k, sfc_alb_dir_k, sfc_alb_dif_k, d_mu0, lwp_k, iwp_k, rel_k, rei_k, cldfrac_tot_k, @@ -1592,7 +1599,7 @@ void RRTMGPRadiation::run_impl (const double dt) { real1dk eff_radius_qi_at_cldtop_k (d_eff_radius_qi_at_cldtop.data() + m_col_chunk_beg[ic], ncol); interface_t::compute_aerocom_cloudtop( - ncol, nlay, t_lay_k, p_lay_k, p_del_k, d_dz, qc_k, qi_k, rel_k, rei_k, cldfrac_tot_k, + ncol, nlay, t_lay_k, p_lay_k, p_del_k, z_del_k, qc_k, qi_k, rel_k, rei_k, cldfrac_tot_k, nc_k, T_mid_at_cldtop_k, p_mid_at_cldtop_k, cldfrac_ice_at_cldtop_k, cldfrac_liq_at_cldtop_k, cldfrac_tot_at_cldtop_k, cdnc_at_cldtop_k, eff_radius_qc_at_cldtop_k, eff_radius_qi_at_cldtop_k); @@ -1658,6 +1665,31 @@ void RRTMGPRadiation::run_impl (const double dt) { const int icol = i + beg; d_sfc_flux_sw_net(icol) = sw_flux_dn_k(i,kbot_k) - sw_flux_up_k(i,kbot_k); d_sfc_flux_lw_dn(icol) = lw_flux_dn_k(i,kbot_k); +#ifdef RRTMGP_LAYOUT_LEFT + // Copy from layout left buffer views back to layout right fields + Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay+1), [&] (const int& k) { + d_sw_flux_up(icol,k) = sw_flux_up_k(i,k); + d_sw_flux_dn(icol,k) = sw_flux_dn_k(i,k); + d_sw_flux_dn_dir(icol,k) = sw_flux_dn_dir_k(i,k); + d_lw_flux_up(icol,k) = lw_flux_up_k(i,k); + d_lw_flux_dn(icol,k) = lw_flux_dn_k(i,k); + d_sw_clnclrsky_flux_up(icol,k) = sw_clnclrsky_flux_up_k(i,k); + d_sw_clnclrsky_flux_dn(icol,k) = sw_clnclrsky_flux_dn_k(i,k); + d_sw_clnclrsky_flux_dn_dir(icol,k) = sw_clnclrsky_flux_dn_dir_k(i,k); + d_sw_clrsky_flux_up(icol,k) = sw_clrsky_flux_up_k(i,k); + d_sw_clrsky_flux_dn(icol,k) = sw_clrsky_flux_dn_k(i,k); + d_sw_clrsky_flux_dn_dir(icol,k) = sw_clrsky_flux_dn_dir_k(i,k); + d_sw_clnsky_flux_up(icol,k) = sw_clnsky_flux_up_k(i,k); + d_sw_clnsky_flux_dn(icol,k) = sw_clnsky_flux_dn_k(i,k); + d_sw_clnsky_flux_dn_dir(icol,k) = sw_clnsky_flux_dn_dir_k(i,k); + d_lw_clnclrsky_flux_up(icol,k) = lw_clnclrsky_flux_up_k(i,k); + d_lw_clnclrsky_flux_dn(icol,k) = lw_clnclrsky_flux_dn_k(i,k); + d_lw_clrsky_flux_up(icol,k) = lw_clrsky_flux_up_k(i,k); + d_lw_clrsky_flux_dn(icol,k) = lw_clrsky_flux_dn_k(i,k); + d_lw_clnsky_flux_up(icol,k) = lw_clnsky_flux_up_k(i,k); + d_lw_clnsky_flux_dn(icol,k) = lw_clnsky_flux_dn_k(i,k); + }); +#endif // Extract optical properties for COSP Kokkos::parallel_for(Kokkos::TeamVectorRange(team, nlay), [&] (const int& k) { d_dtau067(icol,k) = cld_tau_sw_bnd_k(i,k,idx_067_k); diff --git a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.hpp b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.hpp index e036e7ac08c..329c7eeaba8 100644 --- a/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.hpp +++ b/components/eamxx/src/physics/rrtmgp/eamxx_rrtmgp_process_interface.hpp @@ -14,15 +14,26 @@ namespace scream { * exactly ONE instance of this class in its list of subcomponents. */ +// rrtmgp is performance tuned for layout left views but will accept any +// view. We probably want to stick with layout left views for performance +// reasons even though this requires us to make copies of our fields (they +// are layout right). +#define RRTMGP_LAYOUT_LEFT + class RRTMGPRadiation : public AtmosphereProcess { public: - using KT = ekat::KokkosTypes; - using real1dk = typename KT::template view_1d; - using real2dk = typename KT::template view_2d; - using real3dk = typename KT::template view_3d; - using creal1dk = typename KT::template view_1d; - using creal2dk = typename KT::template view_2d; - using creal3dk = typename KT::template view_3d; + using KT = ekat::KokkosTypes; +#ifdef RRTMGP_LAYOUT_LEFT + using layout_t = Kokkos::LayoutLeft; +#else + using layout_t = typename ekat::KokkosTypes::Layout; +#endif + using real1dk = Kokkos::View; + using real2dk = Kokkos::View; + using real3dk = Kokkos::View; + using creal1dk = Kokkos::View; + using creal2dk = Kokkos::View; + using creal3dk = Kokkos::View; using ureal1dk = Unmanaged; using ureal2dk = Unmanaged; using ureal3dk = Unmanaged; @@ -32,7 +43,8 @@ class RRTMGPRadiation : public AtmosphereProcess { using ci_string = ekat::CaseInsensitiveString; - using layout_t = typename ekat::KokkosTypes::Layout; + using lrreal2dk = typename KT::template view_2d; + using ulrreal2dk = Unmanaged; #ifdef RRTMGP_ENABLE_KOKKOS using interface_t = rrtmgp::rrtmgp_interface; @@ -97,7 +109,7 @@ class RRTMGPRadiation : public AtmosphereProcess { // These are the gases that we keep track of int m_ngas; std::vector m_gas_names; - real1dk m_gas_mol_weights; + real1dk m_gas_mol_weights; #ifdef RRTMGP_ENABLE_YAKL GasConcs m_gas_concs; #endif From c346b79fe9162b73d897e9ace9e3e6d1a8d05935 Mon Sep 17 00:00:00 2001 From: James Foucar Date: Fri, 6 Sep 2024 14:22:08 -0600 Subject: [PATCH 4/4] Test fix --- .../single-process/rrtmgp/rrtmgp_standalone_unit.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/components/eamxx/tests/single-process/rrtmgp/rrtmgp_standalone_unit.cpp b/components/eamxx/tests/single-process/rrtmgp/rrtmgp_standalone_unit.cpp index 75feeb1b8dd..dc1ece59b28 100644 --- a/components/eamxx/tests/single-process/rrtmgp/rrtmgp_standalone_unit.cpp +++ b/components/eamxx/tests/single-process/rrtmgp/rrtmgp_standalone_unit.cpp @@ -422,9 +422,14 @@ TEST_CASE("rrtmgp_scream_standalone", "") { #endif #ifdef RRTMGP_ENABLE_KOKKOS TEST_CASE("rrtmgp_scream_standalone_k", "") { - using interface_t = scream::rrtmgp::rrtmgp_interface<>; +#ifdef RRTMGP_LAYOUT_LEFT + using layout_t = Kokkos::LayoutLeft; +#else + using layout_t = typename ekat::KokkosTypes::Layout; +#endif + using interface_t = scream::rrtmgp::rrtmgp_interface; using MDRP = interface_t::MDRP; - using utils_t = rrtmgpTest::rrtmgp_test_utils<>; + using utils_t = rrtmgpTest::rrtmgp_test_utils; using real1dk = interface_t::view_t; using real2dk = interface_t::view_t; using real3dk = interface_t::view_t;