From 0d2ffc4a7359012fae680706d25a7020971109da Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Jul 2024 11:23:29 -0600 Subject: [PATCH 01/18] Refactor for multiple transports --- CMakeLists.txt | 2 + perf_tests/CMakeLists.txt | 75 +++++++------- perf_tests/test_2dhalo.cpp | 30 +++--- perf_tests/test_main.cpp | 2 +- perf_tests/test_osu_latency_isendirecv.cpp | 14 +-- perf_tests/test_osu_latency_sendrecv.cpp | 4 +- perf_tests/test_sendrecv.cpp | 8 +- perf_tests/test_utils.hpp | 2 +- src/KokkosComm.hpp | 43 ++++---- src/KokkosComm_collective.hpp | 19 ++-- src/{impl => }/KokkosComm_concepts.hpp | 25 ++--- src/KokkosComm_config.hpp | 19 ++++ src/KokkosComm_fwd.hpp | 52 ++++++++++ src/KokkosComm_point_to_point.hpp | 50 ++++++++++ src/KokkosComm_request.hpp | 67 ------------- src/impl/KokkosComm_contiguous.hpp | 63 ++++++++++++ src/impl/KokkosComm_include_mpi.hpp | 12 --- src/impl/KokkosComm_irecv.hpp | 55 ----------- src/impl/KokkosComm_isend.hpp | 96 ------------------ src/mpi/KokkosComm_mpi.hpp | 48 +++++++++ .../KokkosComm_mpi_allgather.hpp} | 29 +++--- .../KokkosComm_mpi_alltoall.hpp} | 12 +-- .../KokkosComm_mpi_barrier.hpp} | 28 +++--- src/mpi/KokkosComm_mpi_commmode.hpp | 41 ++++++++ src/mpi/KokkosComm_mpi_handle.hpp | 65 ++++++++++++ src/mpi/KokkosComm_mpi_irecv.hpp | 69 +++++++++++++ src/mpi/KokkosComm_mpi_isend.hpp | 98 +++++++++++++++++++ .../KokkosComm_mpi_recv.hpp} | 15 ++- .../KokkosComm_mpi_reduce.hpp} | 25 +++-- src/mpi/KokkosComm_mpi_req.hpp | 96 ++++++++++++++++++ .../KokkosComm_mpi_send.hpp} | 29 +++--- src/mpi/impl/KokkosComm_include_mpi.hpp | 28 ++++++ src/{ => mpi/impl}/KokkosComm_pack_traits.hpp | 9 +- src/{ => mpi}/impl/KokkosComm_packer.hpp | 2 + src/{ => mpi}/impl/KokkosComm_types.hpp | 39 ++++++++ unit_tests/CMakeLists.txt | 49 +++++++--- unit_tests/{ => mpi}/test_allgather.cpp | 4 +- unit_tests/{ => mpi}/test_alltoall.cpp | 0 unit_tests/{ => mpi}/test_gtest_mpi.cpp | 0 unit_tests/{ => mpi}/test_isendrecv.cpp | 52 +++++----- unit_tests/{ => mpi}/test_mpi.cpp | 0 unit_tests/{ => mpi}/test_reduce.cpp | 2 +- unit_tests/{ => mpi}/test_sendrecv.cpp | 28 +++--- unit_tests/test_barrier.cpp | 7 +- unit_tests/test_isendirecv.cpp | 46 ++++----- unit_tests/test_main.cpp | 2 +- 46 files changed, 942 insertions(+), 519 deletions(-) rename src/{impl => }/KokkosComm_concepts.hpp (63%) create mode 100644 src/KokkosComm_config.hpp create mode 100644 src/KokkosComm_fwd.hpp create mode 100644 src/KokkosComm_point_to_point.hpp delete mode 100644 src/KokkosComm_request.hpp create mode 100644 src/impl/KokkosComm_contiguous.hpp delete mode 100644 src/impl/KokkosComm_include_mpi.hpp delete mode 100644 src/impl/KokkosComm_irecv.hpp delete mode 100644 src/impl/KokkosComm_isend.hpp create mode 100644 src/mpi/KokkosComm_mpi.hpp rename src/{impl/KokkosComm_allgather.hpp => mpi/KokkosComm_mpi_allgather.hpp} (74%) rename src/{impl/KokkosComm_alltoall.hpp => mpi/KokkosComm_mpi_alltoall.hpp} (93%) rename src/{impl/KokkosComm_barrier.hpp => mpi/KokkosComm_mpi_barrier.hpp} (61%) create mode 100644 src/mpi/KokkosComm_mpi_commmode.hpp create mode 100644 src/mpi/KokkosComm_mpi_handle.hpp create mode 100644 src/mpi/KokkosComm_mpi_irecv.hpp create mode 100644 src/mpi/KokkosComm_mpi_isend.hpp rename src/{impl/KokkosComm_recv.hpp => mpi/KokkosComm_mpi_recv.hpp} (88%) rename src/{impl/KokkosComm_reduce.hpp => mpi/KokkosComm_mpi_reduce.hpp} (79%) create mode 100644 src/mpi/KokkosComm_mpi_req.hpp rename src/{impl/KokkosComm_send.hpp => mpi/KokkosComm_mpi_send.hpp} (81%) create mode 100644 src/mpi/impl/KokkosComm_include_mpi.hpp rename src/{ => mpi/impl}/KokkosComm_pack_traits.hpp (76%) rename src/{ => mpi}/impl/KokkosComm_packer.hpp (98%) rename src/{ => mpi}/impl/KokkosComm_types.hpp (75%) rename unit_tests/{ => mpi}/test_allgather.cpp (92%) rename unit_tests/{ => mpi}/test_alltoall.cpp (100%) rename unit_tests/{ => mpi}/test_gtest_mpi.cpp (100%) rename unit_tests/{ => mpi}/test_isendrecv.cpp (59%) rename unit_tests/{ => mpi}/test_mpi.cpp (100%) rename unit_tests/{ => mpi}/test_reduce.cpp (94%) rename unit_tests/{ => mpi}/test_sendrecv.cpp (68%) diff --git a/CMakeLists.txt b/CMakeLists.txt index b5ef9c82..1644baf4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,11 +6,13 @@ project(KokkosComm VERSION 0.0.2) option(KokkosComm_ENABLE_PERFTESTS "Build KokkosComm perf tests" OFF) option(KokkosComm_ENABLE_TESTS "Build KokkosComm perf tests" OFF) +option(KokkosComm_ENABLE_MPI "Build KokkosComm with MPI transport" ON) ## resolve options set(KOKKOSCOMM_ENABLE_PERFTESTS ${KokkosComm_ENABLE_PERFTESTS} CACHE BOOL "" FORCE) set(KOKKOSCOMM_ENABLE_TESTS ${KokkosComm_ENABLE_TESTS} CACHE BOOL "" FORCE) +set(KOKKOSCOMM_ENABLE_MPI ${KokkosComm_ENABLE_MPI} CACHE BOOL "" FORCE) find_package(Kokkos REQUIRED) find_package(MPI REQUIRED) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index e45ed897..6940f1cc 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.12) # same as Kokkos Comm project(KokkosCommPerfTests VERSION 0.0.2) + enable_testing() # Treat the perf tests as a separate project @@ -9,39 +10,41 @@ if (NOT TARGET KokkosComm::KokkosComm) find_package(KokkosComm REQUIRED) endif() -include(FetchContent) - -# Avoid warning about DOWNLOAD_EXTRACT_TIMESTAMP in CMake 3.24: -if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") -cmake_policy(SET CMP0135 NEW) -endif() - -set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) -FetchContent_Declare( - benchmark - URL https://github.com/google/benchmark/archive/refs/tags/v1.8.3.zip -) -# FetchContent_MakeAvailable(benchmark) -# was making install install benchmark as well -# EXCLUDE_FROM_ALL here seems to be the magic -if (NOT benchmark_POPULATED) - FetchContent_Populate(benchmark) - add_subdirectory(${benchmark_SOURCE_DIR} ${benchmark_BINARY_DIR} EXCLUDE_FROM_ALL) -endif() -unset(BENCHMARK_ENABLE_TESTING) - - -add_executable(perf_test-main test_main.cpp - test_sendrecv.cpp - test_2dhalo.cpp - test_osu_latency_sendrecv.cpp - test_osu_latency_isendirecv.cpp -) -if(KOKKOSCOMM_ENABLE_TESTS) - kokkoscomm_add_cxx_flags(TARGET perf_test-main) -endif() -target_link_libraries(perf_test-main KokkosComm::KokkosComm benchmark::benchmark) -add_test(NAME perf_test-main - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./perf_test-main) - - +if (KOKKOSCOMM_ENABLE_MPI) + + include(FetchContent) + + # Avoid warning about DOWNLOAD_EXTRACT_TIMESTAMP in CMake 3.24: + if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") + cmake_policy(SET CMP0135 NEW) + endif() + + set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) + FetchContent_Declare( + benchmark + URL https://github.com/google/benchmark/archive/refs/tags/v1.8.3.zip + ) + # FetchContent_MakeAvailable(benchmark) + # was making install install benchmark as well + # EXCLUDE_FROM_ALL here seems to be the magic + if (NOT benchmark_POPULATED) + FetchContent_Populate(benchmark) + add_subdirectory(${benchmark_SOURCE_DIR} ${benchmark_BINARY_DIR} EXCLUDE_FROM_ALL) + endif() + unset(BENCHMARK_ENABLE_TESTING) + + + add_executable(perf_test-main test_main.cpp + test_sendrecv.cpp + test_2dhalo.cpp + test_osu_latency_sendrecv.cpp + test_osu_latency_isendirecv.cpp + ) + if(KOKKOSCOMM_ENABLE_TESTS) + kokkoscomm_add_cxx_flags(TARGET perf_test-main) + endif() + target_link_libraries(perf_test-main KokkosComm::KokkosComm benchmark::benchmark) + add_test(NAME perf_test-main + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./perf_test-main) + +endif(KOKKOSCOMM_ENABLE_MPI) diff --git a/perf_tests/test_2dhalo.cpp b/perf_tests/test_2dhalo.cpp index 1203d4e0..8c7562b8 100644 --- a/perf_tests/test_2dhalo.cpp +++ b/perf_tests/test_2dhalo.cpp @@ -22,9 +22,11 @@ void noop(benchmark::State, MPI_Comm) {} -template -void send_recv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, int nx, int ny, int rx, int ry, - int rs, const View &v) { +template +void send_recv(benchmark::State &, MPI_Comm comm, const Space &space, int nx, int ny, int rx, int ry, int rs, + const View &v) { + KokkosComm::Handle<> h{comm}; + // 2D index of nbrs in minus and plus direction (periodic) const int xm1 = (rx + rs - 1) % rs; const int ym1 = (ry + rs - 1) % rs; @@ -46,22 +48,20 @@ void send_recv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space auto ym1_s = Kokkos::subview(v, make_pair(1, nx + 1), 1, Kokkos::ALL); auto ym1_r = Kokkos::subview(v, make_pair(1, nx + 1), 0, Kokkos::ALL); - std::vector reqs; + std::vector> reqs; // std::cerr << get_rank(rx, ry) << " -> " << get_rank(xp1, ry) << "\n"; - reqs.push_back(KokkosComm::isend(mode, space, xp1_s, get_rank(xp1, ry), 0, comm)); - reqs.push_back(KokkosComm::isend(mode, space, xm1_s, get_rank(xm1, ry), 1, comm)); - reqs.push_back(KokkosComm::isend(mode, space, yp1_s, get_rank(rx, yp1), 2, comm)); - reqs.push_back(KokkosComm::isend(mode, space, ym1_s, get_rank(rx, ym1), 3, comm)); + reqs.push_back(KokkosComm::isend(h, xp1_s, get_rank(xp1, ry), 0)); + reqs.push_back(KokkosComm::isend(h, xm1_s, get_rank(xm1, ry), 1)); + reqs.push_back(KokkosComm::isend(h, yp1_s, get_rank(rx, yp1), 2)); + reqs.push_back(KokkosComm::isend(h, ym1_s, get_rank(rx, ym1), 3)); - KokkosComm::recv(space, xm1_r, get_rank(xm1, ry), 0, comm); - KokkosComm::recv(space, xp1_r, get_rank(xp1, ry), 1, comm); - KokkosComm::recv(space, ym1_r, get_rank(rx, ym1), 2, comm); - KokkosComm::recv(space, yp1_r, get_rank(rx, yp1), 3, comm); + KokkosComm::mpi::recv(h.space(), xm1_r, get_rank(xm1, ry), 0, h.mpi_comm()); + KokkosComm::mpi::recv(h.space(), xp1_r, get_rank(xp1, ry), 1, h.mpi_comm()); + KokkosComm::mpi::recv(h.space(), ym1_r, get_rank(rx, ym1), 2, h.mpi_comm()); + KokkosComm::mpi::recv(h.space(), yp1_r, get_rank(rx, yp1), 3, h.mpi_comm()); // wait for comm - for (KokkosComm::Req &req : reqs) { - req.wait(); - } + KokkosComm::wait_all(reqs); } void benchmark_2dhalo(benchmark::State &state) { diff --git a/perf_tests/test_main.cpp b/perf_tests/test_main.cpp index cbfc7d2a..4a78cab0 100644 --- a/perf_tests/test_main.cpp +++ b/perf_tests/test_main.cpp @@ -14,7 +14,7 @@ // //@HEADER -#include "impl/KokkosComm_include_mpi.hpp" +#include "mpi/impl/KokkosComm_include_mpi.hpp" #include #include diff --git a/perf_tests/test_osu_latency_isendirecv.cpp b/perf_tests/test_osu_latency_isendirecv.cpp index dd026528..a62c8acc 100644 --- a/perf_tests/test_osu_latency_isendirecv.cpp +++ b/perf_tests/test_osu_latency_isendirecv.cpp @@ -21,15 +21,15 @@ #include "test_utils.hpp" #include "KokkosComm.hpp" -template -void osu_latency_Kokkos_Comm_isendirecv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, - int rank, const View &v) { +template +void osu_latency_Kokkos_Comm_isendirecv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, + const View &v) { + KokkosComm::Handle<> h{space, comm}; + if (rank == 0) { - KokkosComm::Req sendreq = KokkosComm::isend(mode, space, v, 1, 1, comm); - sendreq.wait(); + KokkosComm::wait(KokkosComm::isend(h, v, 1, 1)); } else if (rank == 1) { - KokkosComm::Req recvreq = KokkosComm::irecv(v, 0, 1, comm); - recvreq.wait(); + KokkosComm::wait(KokkosComm::irecv(h, v, 0, 1)); } } diff --git a/perf_tests/test_osu_latency_sendrecv.cpp b/perf_tests/test_osu_latency_sendrecv.cpp index e38d6057..7c93f896 100644 --- a/perf_tests/test_osu_latency_sendrecv.cpp +++ b/perf_tests/test_osu_latency_sendrecv.cpp @@ -25,9 +25,9 @@ template void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, int rank, const View &v) { if (rank == 0) { - KokkosComm::send(mode, space, v, 1, 0, comm); + KokkosComm::mpi::send(space, v, 1, 0, comm); } else if (rank == 1) { - KokkosComm::recv(space, v, 0, 0, comm); + KokkosComm::mpi::recv(space, v, 0, 0, comm); } } diff --git a/perf_tests/test_sendrecv.cpp b/perf_tests/test_sendrecv.cpp index b138df24..ae39a747 100644 --- a/perf_tests/test_sendrecv.cpp +++ b/perf_tests/test_sendrecv.cpp @@ -21,11 +21,11 @@ template void send_recv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, int rank, const View &v) { if (0 == rank) { - KokkosComm::send(mode, space, v, 1, 0, comm); - KokkosComm::recv(space, v, 1, 0, comm); + KokkosComm::mpi::send(space, v, 1, 0, comm); + KokkosComm::mpi::recv(space, v, 1, 0, comm); } else if (1 == rank) { - KokkosComm::recv(space, v, 0, 0, comm); - KokkosComm::send(mode, space, v, 0, 0, comm); + KokkosComm::mpi::recv(space, v, 0, 0, comm); + KokkosComm::mpi::send(space, v, 0, 0, comm); } } diff --git a/perf_tests/test_utils.hpp b/perf_tests/test_utils.hpp index 1f3dddd9..5df3b826 100644 --- a/perf_tests/test_utils.hpp +++ b/perf_tests/test_utils.hpp @@ -20,7 +20,7 @@ #include -#include "impl/KokkosComm_include_mpi.hpp" +#include "mpi/impl/KokkosComm_include_mpi.hpp" // F is a function that takes (state, MPI_Comm, args...) template diff --git a/src/KokkosComm.hpp b/src/KokkosComm.hpp index 15c1b7e1..c5aee687 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm.hpp @@ -16,26 +16,31 @@ #pragma once -#include "KokkosComm_config.hpp" -#include "KokkosComm_collective.hpp" -#include "impl/KokkosComm_isend.hpp" -#include "impl/KokkosComm_irecv.hpp" -#include "impl/KokkosComm_recv.hpp" -#include "impl/KokkosComm_send.hpp" -#include "impl/KokkosComm_alltoall.hpp" -#include "impl/KokkosComm_barrier.hpp" -#include "impl/KokkosComm_concepts.hpp" -#include "KokkosComm_comm_modes.hpp" +#include "KokkosComm_fwd.hpp" -#include +// transport declarations +// TODO: could probably be moved to a per-transport file to be included +#if defined(KOKKOSCOMM_TRANSPORT_MPI) +#include "mpi/KokkosComm_mpi.hpp" +#include "mpi/KokkosComm_mpi_send.hpp" +#include "mpi/KokkosComm_mpi_allgather.hpp" +#include "mpi/KokkosComm_mpi_alltoall.hpp" +#include "mpi/KokkosComm_mpi_barrier.hpp" +#include "mpi/KokkosComm_mpi_handle.hpp" +#include "mpi/KokkosComm_mpi_irecv.hpp" +#include "mpi/KokkosComm_mpi_isend.hpp" +#include "mpi/KokkosComm_mpi_recv.hpp" +#include "mpi/KokkosComm_mpi_reduce.hpp" +#else +#error at least one transport must be defined +#endif -namespace KokkosComm { +#include "KokkosComm_version.hpp" -using Impl::alltoall; -using Impl::barrier; -using Impl::irecv; -using Impl::isend; -using Impl::recv; -using Impl::send; +#include "KokkosComm_concepts.hpp" +#include "KokkosComm_point_to_point.hpp" +#include "KokkosComm_collective.hpp" + +#include -} // namespace KokkosComm +namespace KokkosComm {} // namespace KokkosComm diff --git a/src/KokkosComm_collective.hpp b/src/KokkosComm_collective.hpp index a99ca83e..f4d9942b 100644 --- a/src/KokkosComm_collective.hpp +++ b/src/KokkosComm_collective.hpp @@ -16,23 +16,18 @@ #pragma once +#include + #include -#include "impl/KokkosComm_concepts.hpp" -#include "impl/KokkosComm_alltoall.hpp" -#include "impl/KokkosComm_reduce.hpp" -#include "impl/KokkosComm_allgather.hpp" +#include "KokkosComm_fwd.hpp" +#include "KokkosComm_concepts.hpp" namespace KokkosComm { -template -void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) { - return Impl::reduce(space, sv, rv, op, root, comm); -} - -template -void allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) { - return Impl::allgather(space, sv, rv, comm); +template +void barrier(Handle &&h) { + Impl::Barrier{std::forward>(h)}; } } // namespace KokkosComm diff --git a/src/impl/KokkosComm_concepts.hpp b/src/KokkosComm_concepts.hpp similarity index 63% rename from src/impl/KokkosComm_concepts.hpp rename to src/KokkosComm_concepts.hpp index 8a2e2bce..acf5c1c0 100644 --- a/src/impl/KokkosComm_concepts.hpp +++ b/src/KokkosComm_concepts.hpp @@ -16,34 +16,25 @@ #pragma once -#include "KokkosComm_comm_modes.hpp" +#include #include namespace KokkosComm { +namespace Impl { +// fallback - most types are not a KokkosComm transport template -concept KokkosView = Kokkos::is_view_v; +struct is_transport : public std::false_type {}; +} // namespace Impl template -concept KokkosExecutionSpace = Kokkos::is_execution_space_v; - -template -struct is_communication_mode : std::false_type {}; - -template <> -struct is_communication_mode : std::true_type {}; - -template <> -struct is_communication_mode : std::true_type {}; - -template <> -struct is_communication_mode : std::true_type {}; +concept KokkosView = Kokkos::is_view_v; template -inline constexpr bool is_communication_mode_v = is_communication_mode::value; +concept KokkosExecutionSpace = Kokkos::is_execution_space_v; template -concept CommunicationMode = KokkosComm::is_communication_mode_v; +concept Transport = KokkosComm::Impl::is_transport::value; } // namespace KokkosComm diff --git a/src/KokkosComm_config.hpp b/src/KokkosComm_config.hpp new file mode 100644 index 00000000..a399a8df --- /dev/null +++ b/src/KokkosComm_config.hpp @@ -0,0 +1,19 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#define KOKKOSCOMM_TRANSPORT_MPI // TODO: set through CMake diff --git a/src/KokkosComm_fwd.hpp b/src/KokkosComm_fwd.hpp new file mode 100644 index 00000000..e3da5a1a --- /dev/null +++ b/src/KokkosComm_fwd.hpp @@ -0,0 +1,52 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "KokkosComm_concepts.hpp" +#include "KokkosComm_config.hpp" + +namespace KokkosComm { +#if defined(KOKKOSCOMM_TRANSPORT_MPI) +class Mpi; +using DefaultTransport = Mpi; +using FallbackTransport = Mpi; +#else +#error at least one transport must be defined +#endif + +template +class Req; + +template +class Handle; + +namespace Impl { + +template +struct Irecv; +template +struct Isend; +template +struct Barrier; + +} // namespace Impl + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/KokkosComm_point_to_point.hpp b/src/KokkosComm_point_to_point.hpp new file mode 100644 index 00000000..65aefa66 --- /dev/null +++ b/src/KokkosComm_point_to_point.hpp @@ -0,0 +1,50 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "KokkosComm_fwd.hpp" +#include "KokkosComm_concepts.hpp" + +namespace KokkosComm { + +template +Req irecv(Handle &h, RecvView &rv, int src, int tag) { + return Impl::Irecv::execute(h, rv, src, tag); +} + +template +Req irecv(RecvView &rv, int src, int tag) { + return irecv(Handle{}, rv, src, tag); +} + +template +Req isend(Handle &h, SendView &sv, int dest, int tag) { + return Impl::Isend::execute(h, sv, dest, tag); +} + +template +Req isend(SendView &sv, int dest, int tag) { + return isend(Handle{}, sv, dest, tag); +} + +} // namespace KokkosComm diff --git a/src/KokkosComm_request.hpp b/src/KokkosComm_request.hpp deleted file mode 100644 index 1e3d57ae..00000000 --- a/src/KokkosComm_request.hpp +++ /dev/null @@ -1,67 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#pragma once - -#include -#include - -#include "impl/KokkosComm_include_mpi.hpp" - -namespace KokkosComm { - -class Req { - // a type-erased view. Request uses these to keep temporary views alive for - // the lifetime of "Immediate" MPI operations - struct ViewHolderBase { - virtual ~ViewHolderBase() {} - }; - template - struct ViewHolder : ViewHolderBase { - ViewHolder(const V &v) : v_(v) {} - V v_; - }; - - struct Record { - Record() : req_(MPI_REQUEST_NULL) {} - MPI_Request req_; - std::vector> until_waits_; - }; - - public: - Req() : record_(std::make_shared()) {} - - MPI_Request &mpi_req() { return record_->req_; } - - void wait() { - MPI_Wait(&(record_->req_), MPI_STATUS_IGNORE); - record_->until_waits_.clear(); // drop any views we're keeping alive until wait() - } - - // keep a reference to this view around until wait() is called - template - void keep_until_wait(const View &v) { - // unmanaged views don't own the underlying buffer, so no need to extend lifetime - if (v.use_count() != 0) { - record_->until_waits_.push_back(std::make_shared>(v)); - } - } - - private: - std::shared_ptr record_; -}; - -} // namespace KokkosComm diff --git a/src/impl/KokkosComm_contiguous.hpp b/src/impl/KokkosComm_contiguous.hpp new file mode 100644 index 00000000..65ff9cfd --- /dev/null +++ b/src/impl/KokkosComm_contiguous.hpp @@ -0,0 +1,63 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include + +#include "KokkosComm_concepts.hpp" +#include "KokkosComm_traits.hpp" + +namespace KokkosComm::Impl { + +template +struct contiguous_view { + using type = Kokkos::View; +}; + +template +using contiguous_view_t = contiguous_view::type; + +template +auto allocate_contiguous_for(const Space &space, const std::string &label, View &v) { + using non_const_packed_view_type = contiguous_view_t; + + if constexpr (KokkosComm::rank() == 1) { + return non_const_packed_view_type(Kokkos::view_alloc(space, Kokkos::WithoutInitializing, label), v.extent(0)); + } else if constexpr (KokkosComm::rank() == 2) { + return non_const_packed_view_type(Kokkos::view_alloc(space, Kokkos::WithoutInitializing, label), v.extent(0), + v.extent(1)); + } else { + static_assert(std::is_void_v, "allocate_contiguous_for for views > rank 2 not implemented"); + } +} + +template +auto resize_contiguous_for(const Space &space, DstView &out, const SrcView &in) { + static_assert(DstView::rank == SrcView::rank, ""); + + if constexpr (KokkosComm::rank() == 1) { + Kokkos::realloc(Kokkos::view_alloc(space, Kokkos::WithoutInitializing), out, in.extent(0)); + } else if constexpr (KokkosComm::rank() == 2) { + Kokkos::realloc(Kokkos::view_alloc(space, Kokkos::WithoutInitializing), out, in.extent(0), in.extent(1)); + } else { + static_assert(std::is_void_v, "realloc_contiguous_for for views > rank 2 not implemented"); + } +} + +} // namespace KokkosComm::Impl \ No newline at end of file diff --git a/src/impl/KokkosComm_include_mpi.hpp b/src/impl/KokkosComm_include_mpi.hpp deleted file mode 100644 index c955521a..00000000 --- a/src/impl/KokkosComm_include_mpi.hpp +++ /dev/null @@ -1,12 +0,0 @@ -#pragma once - -#define KOKKOSCOMM_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) - -#if KOKKOSCOMM_GCC_VERSION >= 11400 -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wcast-function-type" -#include -#pragma GCC diagnostic pop -#else -#include -#endif \ No newline at end of file diff --git a/src/impl/KokkosComm_irecv.hpp b/src/impl/KokkosComm_irecv.hpp deleted file mode 100644 index 91c36bb0..00000000 --- a/src/impl/KokkosComm_irecv.hpp +++ /dev/null @@ -1,55 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#pragma once - -#include - -#include - -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_request.hpp" -#include "KokkosComm_traits.hpp" - -// impl -#include "KokkosComm_include_mpi.hpp" - -namespace KokkosComm::Impl { - -// low-level API -template -void irecv(RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Request &req) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::irecv"); - - if (KokkosComm::is_contiguous(rv)) { - using RecvScalar = typename RecvView::value_type; - MPI_Irecv(KokkosComm::data_handle(rv), KokkosComm::span(rv), mpi_type_v, src, tag, comm, &req); - } else { - throw std::runtime_error("Only contiguous irecv viewsupported"); - } - - Kokkos::Tools::popRegion(); -} - -template -KokkosComm::Req irecv(RecvView &rv, int src, int tag, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::irecv"); - KokkosComm::Req req; - irecv(rv, src, tag, comm, req.mpi_req()); - return req; -} - -} // namespace KokkosComm::Impl diff --git a/src/impl/KokkosComm_isend.hpp b/src/impl/KokkosComm_isend.hpp deleted file mode 100644 index ec742fb6..00000000 --- a/src/impl/KokkosComm_isend.hpp +++ /dev/null @@ -1,96 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#pragma once - -#include - -#include -#include - -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_request.hpp" -#include "KokkosComm_traits.hpp" -#include "KokkosComm_comm_modes.hpp" - -// impl -#include "KokkosComm_include_mpi.hpp" - -namespace KokkosComm::Impl { - -template -void isend(const SendView &sv, int dest, int tag, MPI_Comm comm, MPI_Request &req) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::isend"); - using KCT = typename KokkosComm::Traits; - - if (KokkosComm::is_contiguous(sv)) { - using SendScalar = typename SendView::non_const_value_type; - MPI_Isend(KokkosComm::data_handle(sv), KokkosComm::span(sv), mpi_type_v, dest, tag, comm, &req); - } else { - throw std::runtime_error("only contiguous views supported for low-level isend"); - } - Kokkos::Tools::popRegion(); -} - -template -Req isend(const SendMode &, const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::isend"); - - KokkosComm::Req req; - - using KCT = KokkosComm::Traits; - using KCPT = KokkosComm::PackTraits; - - auto mpi_isend_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, - MPI_Comm mpi_comm, MPI_Request *mpi_req) { - if constexpr (std::is_same_v) { - MPI_Isend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } else if constexpr (std::is_same_v) { - MPI_Irsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } else if constexpr (std::is_same_v) { - MPI_Issend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } - }; - - if (KCPT::needs_pack(sv)) { - using Packer = typename KCPT::packer_type; - using MpiArgs = typename Packer::args_type; - - MpiArgs args = Packer::pack(space, sv); - space.fence(); - mpi_isend_fn(KokkosComm::data_handle(args.view), args.count, args.datatype, dest, tag, comm, &req.mpi_req()); - req.keep_until_wait(args.view); - } else { - using SendScalar = typename SendView::value_type; - space.fence(); // can't issue isend until work in space is complete - mpi_isend_fn(KokkosComm::data_handle(sv), KokkosComm::span(sv), mpi_type_v, dest, tag, comm, - &req.mpi_req()); - if (KokkosComm::is_reference_counted()) { - req.keep_until_wait(sv); - } - } - - Kokkos::Tools::popRegion(); - return req; -} - -template -Req isend(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { - return isend(KokkosComm::DefaultCommMode(), space, sv, dest, tag, comm); -} - -} // namespace KokkosComm::Impl diff --git a/src/mpi/KokkosComm_mpi.hpp b/src/mpi/KokkosComm_mpi.hpp new file mode 100644 index 00000000..67a06a48 --- /dev/null +++ b/src/mpi/KokkosComm_mpi.hpp @@ -0,0 +1,48 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include + +#include "KokkosComm_concepts.hpp" +#include "impl/KokkosComm_include_mpi.hpp" + +namespace KokkosComm { + +// TODO: not sure what members this thing needs +struct Mpi { + // TODO: just an example + static int world_size() { + int size; + MPI_Comm_size(MPI_COMM_WORLD, &size); + return size; + } + + // TODO: just an example + static int world_rank() { + int rank; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + return rank; + } + +}; // struct Mpi + +// KokkosComm::Mpi is a KokkosComm::Transport +template <> +struct Impl::is_transport : public std::true_type {}; + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/impl/KokkosComm_allgather.hpp b/src/mpi/KokkosComm_mpi_allgather.hpp similarity index 74% rename from src/impl/KokkosComm_allgather.hpp rename to src/mpi/KokkosComm_mpi_allgather.hpp index c9f089d5..f248d058 100644 --- a/src/impl/KokkosComm_allgather.hpp +++ b/src/mpi/KokkosComm_mpi_allgather.hpp @@ -18,18 +18,16 @@ #include -#include "KokkosComm_pack_traits.hpp" #include "KokkosComm_traits.hpp" +#include "impl/KokkosComm_pack_traits.hpp" +#include "impl/KokkosComm_include_mpi.hpp" +#include "impl/KokkosComm_types.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" -#include "KokkosComm_types.hpp" - -namespace KokkosComm::Impl { +namespace KokkosComm::mpi { template void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::allgather"); + Kokkos::Tools::pushRegion("KokkosComm::Mpi::allgather"); using SendScalar = typename SendView::value_type; using RecvScalar = typename RecvView::value_type; @@ -44,8 +42,8 @@ void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) { throw std::runtime_error("low-level allgather requires contiguous recv view"); } const int count = KokkosComm::span(sv); // all ranks send/recv same count - MPI_Allgather(KokkosComm::data_handle(sv), count, mpi_type_v, KokkosComm::data_handle(rv), count, - mpi_type_v, comm); + MPI_Allgather(KokkosComm::data_handle(sv), count, KokkosComm::Impl::mpi_type_v, + KokkosComm::data_handle(rv), count, KokkosComm::Impl::mpi_type_v, comm); Kokkos::Tools::popRegion(); } @@ -53,7 +51,7 @@ void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) { // in-place allgather template void allgather(const RecvView &rv, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::allgather"); + Kokkos::Tools::pushRegion("KokkosComm::Mpi::allgather"); using RT = KokkosComm::Traits; using RecvScalar = typename RecvView::value_type; @@ -63,24 +61,25 @@ void allgather(const RecvView &rv, MPI_Comm comm) { if (!RT::is_contiguous(rv)) { throw std::runtime_error("low-level allgather requires contiguous recv view"); } - MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, RT::data_handle(rv), RT::span(rv), mpi_type_v, comm); + MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, RT::data_handle(rv), RT::span(rv), + KokkosComm::Impl::mpi_type_v, comm); Kokkos::Tools::popRegion(); } template void allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::allgather"); + Kokkos::Tools::pushRegion("KokkosComm::Mpi::allgather"); using SPT = KokkosComm::PackTraits; using RPT = KokkosComm::PackTraits; - if (SPT::needs_pack(sv) || RPT::needs_pack(rv)) { + if (!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv)) { throw std::runtime_error("allgather for non-contiguous views not implemented"); } else { - space.fence(); // work in space may have been used to produce send view data + space.fence("fence before allgather"); // work in space may have been used to produce send view data allgather(sv, rv, comm); } Kokkos::Tools::popRegion(); } -} // namespace KokkosComm::Impl +} // namespace KokkosComm::mpi diff --git a/src/impl/KokkosComm_alltoall.hpp b/src/mpi/KokkosComm_mpi_alltoall.hpp similarity index 93% rename from src/impl/KokkosComm_alltoall.hpp rename to src/mpi/KokkosComm_mpi_alltoall.hpp index 1bc6aa93..a2bdf7d3 100644 --- a/src/impl/KokkosComm_alltoall.hpp +++ b/src/mpi/KokkosComm_mpi_alltoall.hpp @@ -36,12 +36,10 @@ #include -#include "KokkosComm_pack_traits.hpp" #include "KokkosComm_traits.hpp" - -// impl -#include "KokkosComm_include_mpi.hpp" -#include "KokkosComm_types.hpp" +#include "impl/KokkosComm_pack_traits.hpp" +#include "impl/KokkosComm_include_mpi.hpp" +#include "impl/KokkosComm_types.hpp" namespace KokkosComm::Impl { template @@ -58,7 +56,7 @@ void alltoall(const ExecSpace &space, const SendView &sv, const size_t sendCount // Make sure views are ready space.fence("KokkosComm::Impl::alltoall"); - if (KokkosComm::PackTraits::needs_pack(sv) || KokkosComm::PackTraits::needs_pack(rv)) { + if (!KokkosComm::is_contiguous(sv) || !KokkosComm::is_contiguous(rv)) { throw std::runtime_error("alltoall for non-contiguous views not implemented"); } else { int size; @@ -96,7 +94,7 @@ void alltoall(const ExecSpace &space, const RecvView &rv, const size_t recvCount // Make sure views are ready space.fence("KokkosComm::Impl::alltoall"); - if (KokkosComm::PackTraits::needs_pack(rv)) { + if (!KokkosComm::is_contiguous(rv)) { throw std::runtime_error("alltoall for non-contiguous views not implemented"); } else { int size; diff --git a/src/impl/KokkosComm_barrier.hpp b/src/mpi/KokkosComm_mpi_barrier.hpp similarity index 61% rename from src/impl/KokkosComm_barrier.hpp rename to src/mpi/KokkosComm_mpi_barrier.hpp index 44580086..ef3ec5ec 100644 --- a/src/impl/KokkosComm_barrier.hpp +++ b/src/mpi/KokkosComm_mpi_barrier.hpp @@ -16,26 +16,26 @@ #pragma once -#include - #include "KokkosComm_concepts.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" +namespace KokkosComm { -namespace KokkosComm::Impl { +namespace Impl { +template +struct Barrier { + Barrier(Handle &&h) { + h.space().fence("KokkosComm::Impl::Barrier"); + MPI_Barrier(h.mpi_comm()); + } +}; +} // namespace Impl +namespace mpi { inline void barrier(MPI_Comm comm) { - Kokkos::Tools::pushRegion("KokkosComm::Impl::barrier"); + Kokkos::Tools::pushRegion("KokkosComm::mpi::barrier"); MPI_Barrier(comm); Kokkos::Tools::popRegion(); } +} // namespace mpi -// a barrier in the provided space. For MPI, we have to fence the space and do a host barrier -template -void barrier(const ExecSpace &space, MPI_Comm comm) { - space.fence("KokkosComm::Impl::barrier"); - barrier(comm); -} - -} // namespace KokkosComm::Impl +} // namespace KokkosComm diff --git a/src/mpi/KokkosComm_mpi_commmode.hpp b/src/mpi/KokkosComm_mpi_commmode.hpp new file mode 100644 index 00000000..489bfe01 --- /dev/null +++ b/src/mpi/KokkosComm_mpi_commmode.hpp @@ -0,0 +1,41 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +namespace KokkosComm::mpi { +// Scoped enumeration to specify the communication mode of a sending operation. +// See section 3.4 of the MPI standard for a complete specification. +enum class CommMode { + // Default mode: lets the user override the send operations behavior at + // compile-time. E.g., this can be set to mode "Synchronous" for debug + // builds by defining KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE. + Default, + // Standard mode: MPI implementation decides whether outgoing messages will + // be buffered. Send operations can be started whether or not a matching + // receive has been started. They may complete before a matching receive is + // started. Standard mode is non-local: successful completion of the send + // operation may depend on the occurrence of a matching receive. + Standard, + // Ready mode: Send operations may be started only if the matching receive is + // already started. + Ready, + // Synchronous mode: Send operations complete successfully only if a matching + // receive is started, and the receive operation has started to receive the + // message sent. + Synchronous, +}; +} // namespace KokkosComm::mpi \ No newline at end of file diff --git a/src/mpi/KokkosComm_mpi_handle.hpp b/src/mpi/KokkosComm_mpi_handle.hpp new file mode 100644 index 00000000..b4633da4 --- /dev/null +++ b/src/mpi/KokkosComm_mpi_handle.hpp @@ -0,0 +1,65 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "KokkosComm_fwd.hpp" + +#include "KokkosComm_mpi_req.hpp" + +namespace KokkosComm { + +/* +- init_fence +- allocations +- pre_copies +- pre_comm_fence +- comm +- wait +- post-wait +*/ +template +class Handle { + public: + using execution_space = ExecSpace; + using transport_type = Mpi; + using size_type = int; + + explicit Handle(const execution_space &space, MPI_Comm comm) : space_(space), comm_(comm) {} + explicit Handle(MPI_Comm comm) : Handle(Kokkos::DefaultExecutionSpace{}, comm) {} + Handle() : Handle(Kokkos::DefaultExecutionSpace{}, MPI_COMM_WORLD) {} + + MPI_Comm &mpi_comm() { return comm_; } + const execution_space &space() const { return space_; } + + size_type size() { + size_type ret; + MPI_Comm_size(comm_, &ret); + return ret; + } + + size_type rank() { + size_type ret; + MPI_Comm_rank(comm_, &ret); + return ret; + } + + private: + execution_space space_; + MPI_Comm comm_; +}; + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/mpi/KokkosComm_mpi_irecv.hpp b/src/mpi/KokkosComm_mpi_irecv.hpp new file mode 100644 index 00000000..9f0a1cd1 --- /dev/null +++ b/src/mpi/KokkosComm_mpi_irecv.hpp @@ -0,0 +1,69 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "KokkosComm_mpi.hpp" + +namespace KokkosComm { + +namespace Impl { +// irecv implementation for Mpi +template +struct Irecv { + static Req execute(Handle &h, const RecvView &rv, int src, int tag) { + using KCT = KokkosComm::Traits; + using KCPT = KokkosComm::PackTraits; + using Packer = typename KCPT::packer_type; + using Args = typename Packer::args_type; + + const ExecSpace &space = h.space(); + + Req req; + if (KokkosComm::is_contiguous(rv)) { + space.fence("fence before irecv"); + MPI_Irecv(KokkosComm::data_handle(rv), 1, view_mpi_type(rv), src, tag, h.mpi_comm(), + &req.mpi_request()); // TODO: probably best to just use the scalar type + req.extend_view_lifetime(rv); + } else { + Args args = Packer::allocate_packed_for(space, "TODO", rv); + space.fence("fence before irecv"); + MPI_Irecv(args.view.data(), args.count, args.datatype, src, tag, h.mpi_comm(), &req.mpi_request()); + req.extend_view_lifetime(rv); + // implicitly extends args.view lifetime since lambda holds a copy + req.call_after_mpi_wait([=]() { Packer::unpack_into(space, rv, args.view); }); + } + return req; + } +}; +} // namespace Impl + +namespace mpi { +template +void irecv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Request &req) { + Kokkos::Tools::pushRegion("KokkosComm::mpi::irecv"); + + if (KokkosComm::is_contiguous(rv)) { + using RecvScalar = typename RecvView::non_const_value_type; + MPI_Irecv(KokkosComm::data_handle(rv), KokkosComm::span(rv), Impl::mpi_type_v, src, tag, comm, &req); + } else { + throw std::runtime_error("Only contiguous irecv viewsupported"); + } + Kokkos::Tools::popRegion(); +} +} // namespace mpi + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/mpi/KokkosComm_mpi_isend.hpp b/src/mpi/KokkosComm_mpi_isend.hpp new file mode 100644 index 00000000..deefbcf0 --- /dev/null +++ b/src/mpi/KokkosComm_mpi_isend.hpp @@ -0,0 +1,98 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include "KokkosComm_traits.hpp" + +#include "KokkosComm_mpi.hpp" +#include "impl/KokkosComm_types.hpp" +#include "KokkosComm_mpi_commmode.hpp" + +namespace KokkosComm { + +namespace Impl { + +template +Req isend_impl(Handle &h, const SendView &sv, int dest, int tag) { + auto mpi_isend_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, + MPI_Comm mpi_comm, MPI_Request *mpi_req) { + if constexpr (SendMode == mpi::CommMode::Standard) { + MPI_Isend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); + } else if constexpr (SendMode == mpi::CommMode::Ready) { + MPI_Irsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); + } else if constexpr (SendMode == mpi::CommMode::Synchronous) { + MPI_Issend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); + } else if constexpr (SendMode == mpi::CommMode::Default) { +#ifdef KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE + MPI_Issend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); +#else + MPI_Isend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); +#endif + } + }; + + Req req; + if (KokkosComm::is_contiguous(sv)) { + h.space().fence("fence before isend"); + mpi_isend_fn(KokkosComm::data_handle(sv), 1, view_mpi_type(sv), dest, tag, h.mpi_comm(), &req.mpi_request()); + req.extend_view_lifetime(sv); + } else { + using Packer = typename KokkosComm::PackTraits::packer_type; + using Args = typename Packer::args_type; + + Args args = Packer::pack(h.space(), sv); + h.space().fence("fence before isend"); + mpi_isend_fn(args.view.data(), args.count, args.datatype, dest, tag, h.mpi_comm(), &req.mpi_request()); + req.extend_view_lifetime(args.view); + req.extend_view_lifetime(sv); + } + return req; +} + +// Implementation of KokkosComm::Isend +template +struct Isend { + static Req execute(Handle &h, const SendView &sv, int dest, int tag) { + return isend_impl(h, sv, dest, tag); + } +}; + +} // namespace Impl + +namespace mpi { + +template +Req isend(Handle &h, const SendView &sv, int dest, int tag) { + return KokkosComm::Impl::isend_impl(h, sv, dest, tag); +} + +template +void isend(const SendView &sv, int dest, int tag, MPI_Comm comm, MPI_Request &req) { + Kokkos::Tools::pushRegion("KokkosComm::Impl::isend"); + + if (KokkosComm::is_contiguous(sv)) { + using SendScalar = typename SendView::non_const_value_type; + MPI_Isend(KokkosComm::data_handle(sv), KokkosComm::span(sv), Impl::mpi_type_v, dest, tag, comm, &req); + } else { + throw std::runtime_error("only contiguous views supported for low-level isend"); + } + Kokkos::Tools::popRegion(); +} + +} // namespace mpi + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/impl/KokkosComm_recv.hpp b/src/mpi/KokkosComm_mpi_recv.hpp similarity index 88% rename from src/impl/KokkosComm_recv.hpp rename to src/mpi/KokkosComm_mpi_recv.hpp index f3d2039e..1c5b26fd 100644 --- a/src/impl/KokkosComm_recv.hpp +++ b/src/mpi/KokkosComm_mpi_recv.hpp @@ -19,13 +19,11 @@ #include #include "KokkosComm_concepts.hpp" -#include "KokkosComm_pack_traits.hpp" #include "KokkosComm_traits.hpp" +#include "impl/KokkosComm_pack_traits.hpp" +#include "impl/KokkosComm_include_mpi.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" - -namespace KokkosComm::Impl { +namespace KokkosComm::mpi { template void recv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Status *status) { @@ -34,7 +32,8 @@ void recv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Status *statu if (KokkosComm::is_contiguous(rv)) { using ScalarType = typename RecvView::non_const_value_type; - MPI_Recv(KokkosComm::data_handle(rv), KokkosComm::span(rv), mpi_type_v, src, tag, comm, status); + MPI_Recv(KokkosComm::data_handle(rv), KokkosComm::span(rv), KokkosComm::Impl::mpi_type_v, src, tag, + comm, status); } else { throw std::runtime_error("only contiguous views supported for low-level recv"); } @@ -48,7 +47,7 @@ void recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) using KCT = KokkosComm::Traits; using KCPT = KokkosComm::PackTraits; - if (KCPT::needs_unpack(rv)) { + if (!KokkosComm::is_contiguous(rv)) { using Packer = typename KCPT::packer_type; using Args = typename Packer::args_type; @@ -64,4 +63,4 @@ void recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) Kokkos::Tools::popRegion(); } -} // namespace KokkosComm::Impl +} // namespace KokkosComm::mpi diff --git a/src/impl/KokkosComm_reduce.hpp b/src/mpi/KokkosComm_mpi_reduce.hpp similarity index 79% rename from src/impl/KokkosComm_reduce.hpp rename to src/mpi/KokkosComm_mpi_reduce.hpp index 9c28acc6..1bc95ad5 100644 --- a/src/impl/KokkosComm_reduce.hpp +++ b/src/mpi/KokkosComm_mpi_reduce.hpp @@ -18,14 +18,12 @@ #include -#include "KokkosComm_pack_traits.hpp" #include "KokkosComm_traits.hpp" +#include "impl/KokkosComm_pack_traits.hpp" +#include "impl/KokkosComm_include_mpi.hpp" +#include "impl/KokkosComm_types.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" -#include "KokkosComm_types.hpp" - -namespace KokkosComm::Impl { +namespace KokkosComm::mpi { template void reduce(const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) { @@ -35,7 +33,8 @@ void reduce(const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Com if (SPT::is_contiguous(sv) && RPT::is_contiguous(rv)) { using SendScalar = typename SendView::non_const_value_type; - MPI_Reduce(SPT::data_handle(sv), RPT::data_handle(rv), SPT::span(sv), mpi_type_v, op, root, comm); + MPI_Reduce(SPT::data_handle(sv), RPT::data_handle(rv), SPT::span(sv), KokkosComm::Impl::mpi_type_v, op, + root, comm); } else { throw std::runtime_error("only contiguous views supported for low-level reduce"); } @@ -55,10 +54,10 @@ void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_ using SendPacker = typename KokkosComm::PackTraits::packer_type; using RecvPacker = typename KokkosComm::PackTraits::packer_type; - if (KokkosComm::PackTraits::needs_pack(sv)) { + if (!KokkosComm::is_contiguous(sv)) { auto sendArgs = SendPacker::pack(space, sv); space.fence(); - if ((root == rank) && KokkosComm::PackTraits::needs_unpack(rv)) { + if ((root == rank) && !KokkosComm::is_contiguous(rv)) { auto recvArgs = RecvPacker::allocate_packed_for(space, "reduce recv", rv); space.fence(); MPI_Reduce(sendArgs.view.data(), recvArgs.view.data(), sendArgs.count, sendArgs.datatype, op, root, comm); @@ -69,17 +68,17 @@ void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_ } } else { using SendScalar = typename SendView::value_type; - if ((root == rank) && KokkosComm::PackTraits::needs_unpack(rv)) { + if ((root == rank) && !KokkosComm::is_contiguous(rv)) { auto recvArgs = RecvPacker::allocate_packed_for(space, "reduce recv", rv); space.fence(); - MPI_Reduce(sv.data(), recvArgs.view.data(), sv.span(), mpi_type_v, op, root, comm); + MPI_Reduce(sv.data(), recvArgs.view.data(), sv.span(), KokkosComm::Impl::mpi_type_v, op, root, comm); RecvPacker::unpack_into(space, rv, recvArgs.view); } else { space.fence(); - MPI_Reduce(sv.data(), rv.data(), sv.span(), mpi_type_v, op, root, comm); + MPI_Reduce(sv.data(), rv.data(), sv.span(), KokkosComm::Impl::mpi_type_v, op, root, comm); } } Kokkos::Tools::popRegion(); } -} // namespace KokkosComm::Impl +} // namespace KokkosComm::mpi diff --git a/src/mpi/KokkosComm_mpi_req.hpp b/src/mpi/KokkosComm_mpi_req.hpp new file mode 100644 index 00000000..898aacf4 --- /dev/null +++ b/src/mpi/KokkosComm_mpi_req.hpp @@ -0,0 +1,96 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#include +#include +#include + +#include "KokkosComm_fwd.hpp" + +#include "KokkosComm_mpi.hpp" + +namespace KokkosComm { + +template <> +class Req { + // a type-erased view. Request uses these to keep temporary views alive for + // the lifetime of "Immediate" MPI operations + struct ViewHolderBase { + virtual ~ViewHolderBase() {} + }; + template + struct ViewHolder : ViewHolderBase { + ViewHolder(const V &v) : v_(v) {} + V v_; + }; + + struct Record { + Record() : req_(MPI_REQUEST_NULL) {} + MPI_Request req_; + std::vector> postWaits_; + }; + + public: + Req() : record_(std::make_shared()) {} + + MPI_Request &mpi_request() { return record_->req_; } + + // keep a reference to this view around until wait() is called + // TODO: no-op if unmanaged + template + void extend_view_lifetime(const View &v) { + // capture a copy of v into a no-op lambda + record_->postWaits_.push_back([v]() {} /* capture v into a no-op lambda that does nothing*/); + } + + void call_after_mpi_wait(std::function &&f) { record_->postWaits_.push_back(f); } + + private: + std::shared_ptr record_; + + friend void wait(Req req); + friend void wait_all(std::vector> &reqs); + friend int wait_any(std::vector> &reqs); +}; + +inline void wait(Req req) { + MPI_Wait(&req.mpi_request(), MPI_STATUS_IGNORE); + for (auto &f : req.record_->postWaits_) { + f(); + } + req.record_->postWaits_.clear(); +} + +inline void wait_all(std::vector> &reqs) { + for (Req &req : reqs) { + wait(req); + } +} + +inline int wait_any(std::vector> &reqs) { + for (size_t i = 0; i < reqs.size(); ++i) { + int completed; + MPI_Test(&(reqs[i].mpi_request()), &completed, MPI_STATUS_IGNORE); + if (completed) { + return true; + } + } + return false; +} + +} // namespace KokkosComm \ No newline at end of file diff --git a/src/impl/KokkosComm_send.hpp b/src/mpi/KokkosComm_mpi_send.hpp similarity index 81% rename from src/impl/KokkosComm_send.hpp rename to src/mpi/KokkosComm_mpi_send.hpp index 2ea5dfe6..e335ced1 100644 --- a/src/impl/KokkosComm_send.hpp +++ b/src/mpi/KokkosComm_mpi_send.hpp @@ -18,14 +18,11 @@ #include -#include "KokkosComm_pack_traits.hpp" -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_comm_modes.hpp" +#include "KokkosComm_mpi_commmode.hpp" +#include "impl/KokkosComm_pack_traits.hpp" +#include "impl/KokkosComm_include_mpi.hpp" -// impl -#include "KokkosComm_include_mpi.hpp" - -namespace KokkosComm::Impl { +namespace KokkosComm::mpi { template void send(const SendMode &, const SendView &sv, int dest, int tag, MPI_Comm comm) { @@ -45,7 +42,8 @@ void send(const SendMode &, const SendView &sv, int dest, int tag, MPI_Comm comm if (KokkosComm::is_contiguous(sv)) { using SendScalar = typename SendView::non_const_value_type; - mpi_send_fn(KokkosComm::data_handle(sv), KokkosComm::span(sv), mpi_type_v, dest, tag, comm); + MPI_Send(KokkosComm::data_handle(sv), KokkosComm::span(sv), KokkosComm::Impl::mpi_type_v, dest, tag, + comm); } else { throw std::runtime_error("only contiguous views supported for low-level send"); } @@ -74,21 +72,16 @@ void send(const SendMode &, const ExecSpace &space, const SendView &sv, int dest } }; - if (KokkosComm::PackTraits::needs_pack(sv)) { + if (KokkosComm::is_contiguous(sv)) { + using SendScalar = typename SendView::value_type; + mpi_send_fn(sv.data(), sv.span(), KokkosComm::Impl::mpi_type_v, dest, tag, comm); + } else { auto args = Packer::pack(space, sv); space.fence(); mpi_send_fn(args.view.data(), args.count, args.datatype, dest, tag, comm); - } else { - using SendScalar = typename SendView::value_type; - mpi_send_fn(sv.data(), sv.span(), mpi_type_v, dest, tag, comm); } Kokkos::Tools::popRegion(); } -template -void send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { - send(KokkosComm::DefaultCommMode(), space, sv, dest, tag, comm); -} - -} // namespace KokkosComm::Impl +} // namespace KokkosComm::mpi diff --git a/src/mpi/impl/KokkosComm_include_mpi.hpp b/src/mpi/impl/KokkosComm_include_mpi.hpp new file mode 100644 index 00000000..1219c04e --- /dev/null +++ b/src/mpi/impl/KokkosComm_include_mpi.hpp @@ -0,0 +1,28 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#pragma once + +#define KOKKOSCOMM_GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) + +#if KOKKOSCOMM_GCC_VERSION >= 11400 +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wcast-function-type" +#include +#pragma GCC diagnostic pop +#else +#include +#endif \ No newline at end of file diff --git a/src/KokkosComm_pack_traits.hpp b/src/mpi/impl/KokkosComm_pack_traits.hpp similarity index 76% rename from src/KokkosComm_pack_traits.hpp rename to src/mpi/impl/KokkosComm_pack_traits.hpp index 39a74eac..553b217f 100644 --- a/src/KokkosComm_pack_traits.hpp +++ b/src/mpi/impl/KokkosComm_pack_traits.hpp @@ -21,24 +21,17 @@ #include "impl/KokkosComm_concepts.hpp" #include "impl/KokkosComm_packer.hpp" -/*! \brief Defines a common interface for packing and unpacking - Kokkos::View-like types \file KokkosComm_traits.hpp -*/ - namespace KokkosComm { template struct PackTraits { - static_assert(std::is_void_v, "KokkosComm::PackTraits not specialized for type"); + static_assert(std::is_void_v, "KokkosComm::PackTraits not specialized for requested type"); }; /*! \brief This can be specialized to do custom behavior for a particular view*/ template struct PackTraits { using packer_type = Impl::Packer::DeepCopy; - - static bool needs_unpack(const View &v) { return !KokkosComm::is_contiguous(v); } - static bool needs_pack(const View &v) { return !KokkosComm::is_contiguous(v); } }; } // namespace KokkosComm diff --git a/src/impl/KokkosComm_packer.hpp b/src/mpi/impl/KokkosComm_packer.hpp similarity index 98% rename from src/impl/KokkosComm_packer.hpp rename to src/mpi/impl/KokkosComm_packer.hpp index 3429b1ed..56d43c88 100644 --- a/src/impl/KokkosComm_packer.hpp +++ b/src/mpi/impl/KokkosComm_packer.hpp @@ -21,6 +21,8 @@ #include "KokkosComm_types.hpp" #include "KokkosComm_include_mpi.hpp" +// todo: redo this using KokkosComm_contiguous + namespace KokkosComm::Impl { namespace Packer { diff --git a/src/impl/KokkosComm_types.hpp b/src/mpi/impl/KokkosComm_types.hpp similarity index 75% rename from src/impl/KokkosComm_types.hpp rename to src/mpi/impl/KokkosComm_types.hpp index 548f21de..4e46ce70 100644 --- a/src/impl/KokkosComm_types.hpp +++ b/src/mpi/impl/KokkosComm_types.hpp @@ -107,4 +107,43 @@ MPI_Datatype mpi_type() { template inline MPI_Datatype mpi_type_v = mpi_type(); + +template +MPI_Datatype view_mpi_type(const View &view) { +#define USE_CACHE + +#if defined(USE_CACHE) + using Key = std::array; + static std::map cache; + + Key key; + for (size_t d = 0; d < View::rank; d++) { + key[2 * d] = view.extent(d); + key[2 * d + 1] = view.stride(d); + } + if (cache.count(key) > 0) { + return cache[key]; + } +#endif + + using value_type = typename View::non_const_value_type; + MPI_Datatype type = mpi_type_v; + + // This doesn't work for 1D contiguous views into reduce because it + // represents the whole 1D view as 1 Hvector, rather than N elements. + // FIXME: is there a more generic way to handle this, maybe by treating + // the last dimension specially under certain circumstances? + for (size_t d = 0; d < KokkosComm::rank(); ++d) { + MPI_Datatype newtype; + MPI_Type_create_hvector(KokkosComm::extent(view, d) /*count*/, 1 /*block length*/, + KokkosComm::stride(view, d) * sizeof(value_type), type, &newtype); + type = newtype; + } + MPI_Type_commit(&type); +#if defined(USE_CACHE) + cache[key] = type; +#endif + return type; +} + }; // namespace KokkosComm::Impl diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index 93ea170f..f0df264c 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -31,27 +31,44 @@ if (NOT googletest_POPULATED) endif() # Standalone MPI smoke tests (do not use KokkosComm) -add_executable(test-mpi test_mpi.cpp) -add_test(NAME test-mpi-1 - COMMAND mpirun -np 1 ./test-mpi -) -add_test(NAME test-mpi-2 - COMMAND mpirun -np 2 ./test-mpi -) -# doesn't use KokkosComm, so explicitly link MPI -target_link_libraries(test-mpi MPI::MPI_CXX) + # Kokkos Comm tests -add_executable(test-main test_main.cpp - test_gtest_mpi.cpp +set(KOKKOSCOMM_TEST_SOURCES) +list(APPEND KOKKOSCOMM_TEST_SOURCES test_main.cpp test_isendirecv.cpp - test_isendrecv.cpp - test_sendrecv.cpp test_barrier.cpp - test_alltoall.cpp - test_reduce.cpp - test_allgather.cpp + +) +if(KOKKOSCOMM_ENABLE_MPI) + + add_executable(test-mpi mpi/test_mpi.cpp) + add_test(NAME test-mpi-1 + COMMAND mpirun -np 1 ./test-mpi + ) + add_test(NAME test-mpi-2 + COMMAND mpirun -np 2 ./test-mpi + ) + # doesn't use KokkosComm, so explicitly link MPI + target_link_libraries(test-mpi MPI::MPI_CXX) + + + list(PREPEND KOKKOSCOMM_TEST_SOURCES + mpi/test_gtest_mpi.cpp + ) + list(APPEND KOKKOSCOMM_TEST_SOURCES + mpi/test_sendrecv.cpp + mpi/test_allgather.cpp + mpi/test_alltoall.cpp + mpi/test_isendrecv.cpp + mpi/test_alltoall.cpp + mpi/test_reduce.cpp + mpi/test_allgather.cpp + ) +endif() + add_executable(test-main ${KOKKOSCOMM_TEST_SOURCES} ) + target_link_libraries(test-main KokkosComm::KokkosComm gtest) if(KOKKOSCOMM_ENABLE_TESTS) kokkoscomm_add_cxx_flags(TARGET test-main) diff --git a/unit_tests/test_allgather.cpp b/unit_tests/mpi/test_allgather.cpp similarity index 92% rename from unit_tests/test_allgather.cpp rename to unit_tests/mpi/test_allgather.cpp index 8671039c..18e0cedb 100644 --- a/unit_tests/test_allgather.cpp +++ b/unit_tests/mpi/test_allgather.cpp @@ -42,7 +42,7 @@ void test_allgather_0d() { Kokkos::parallel_for( sv.extent(0), KOKKOS_LAMBDA(const int) { sv() = rank; }); - KokkosComm::allgather(Kokkos::DefaultExecutionSpace(), sv, rv, MPI_COMM_WORLD); + KokkosComm::mpi::allgather(Kokkos::DefaultExecutionSpace(), sv, rv, MPI_COMM_WORLD); int errs; Kokkos::parallel_reduce( @@ -67,7 +67,7 @@ void test_allgather_1d_contig() { Kokkos::parallel_for( sv.extent(0), KOKKOS_LAMBDA(const int i) { sv(i) = rank + i; }); - KokkosComm::allgather(Kokkos::DefaultExecutionSpace(), sv, rv, MPI_COMM_WORLD); + KokkosComm::mpi::allgather(Kokkos::DefaultExecutionSpace(), sv, rv, MPI_COMM_WORLD); int errs; Kokkos::parallel_reduce( diff --git a/unit_tests/test_alltoall.cpp b/unit_tests/mpi/test_alltoall.cpp similarity index 100% rename from unit_tests/test_alltoall.cpp rename to unit_tests/mpi/test_alltoall.cpp diff --git a/unit_tests/test_gtest_mpi.cpp b/unit_tests/mpi/test_gtest_mpi.cpp similarity index 100% rename from unit_tests/test_gtest_mpi.cpp rename to unit_tests/mpi/test_gtest_mpi.cpp diff --git a/unit_tests/test_isendrecv.cpp b/unit_tests/mpi/test_isendrecv.cpp similarity index 59% rename from unit_tests/test_isendrecv.cpp rename to unit_tests/mpi/test_isendrecv.cpp index 40e6995f..f2ffbbf4 100644 --- a/unit_tests/test_isendrecv.cpp +++ b/unit_tests/mpi/test_isendrecv.cpp @@ -31,29 +31,28 @@ using ScalarTypes = ::testing::Types, Kokkos::complex, int, unsigned, int64_t, size_t>; TYPED_TEST_SUITE(IsendRecv, ScalarTypes); -template +template void isend_comm_mode_1d_contig() { - if constexpr (std::is_same_v) { + if (IsendMode == KokkosComm::mpi::CommMode::Ready) { GTEST_SKIP() << "Skipping test for ready-mode send"; } Kokkos::View a("a", 1000); - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + KokkosComm::Handle<> h; + if (h.size() < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << h.size() << " provided)"; } - if (0 == rank) { + if (0 == h.rank()) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::isend(IsendMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD).wait(); - } else if (1 == rank) { + KokkosComm::Req req = KokkosComm::mpi::isend(h, a, dst, 0); + KokkosComm::wait(req); + } else if (1 == h.rank()) { int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + KokkosComm::mpi::recv(h.space(), a, src, 0, h.mpi_comm()); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); @@ -61,9 +60,9 @@ void isend_comm_mode_1d_contig() { } } -template +template void isend_comm_mode_1d_noncontig() { - if constexpr (std::is_same_v) { + if (IsendMode == KokkosComm::mpi::CommMode::Ready) { GTEST_SKIP() << "Skipping test for ready-mode send"; } @@ -71,17 +70,20 @@ void isend_comm_mode_1d_noncontig() { Kokkos::View b("a", 10, 10); auto a = Kokkos::subview(b, Kokkos::ALL, 2); // take column 2 (non-contiguous) - int rank; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); + KokkosComm::Handle<> h; + if (h.size() < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << h.size() << " provided)"; + } - if (0 == rank) { + if (0 == h.rank()) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::isend(IsendMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD).wait(); - } else if (1 == rank) { + KokkosComm::Req req = KokkosComm::mpi::isend(h, a, dst, 0); + KokkosComm::wait(req); + } else if (1 == h.rank()) { int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + KokkosComm::mpi::recv(h.space(), a, src, 0, h.mpi_comm()); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); @@ -90,27 +92,27 @@ void isend_comm_mode_1d_noncontig() { } TYPED_TEST(IsendRecv, 1D_contig_standard) { - isend_comm_mode_1d_contig(); + isend_comm_mode_1d_contig(); } TYPED_TEST(IsendRecv, 1D_contig_ready) { - isend_comm_mode_1d_contig(); + isend_comm_mode_1d_contig(); } TYPED_TEST(IsendRecv, 1D_contig_synchronous) { - isend_comm_mode_1d_contig(); + isend_comm_mode_1d_contig(); } TYPED_TEST(IsendRecv, 1D_noncontig_standard) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } TYPED_TEST(IsendRecv, 1D_noncontig_ready) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } TYPED_TEST(IsendRecv, 1D_noncontig_synchronous) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } } // namespace diff --git a/unit_tests/test_mpi.cpp b/unit_tests/mpi/test_mpi.cpp similarity index 100% rename from unit_tests/test_mpi.cpp rename to unit_tests/mpi/test_mpi.cpp diff --git a/unit_tests/test_reduce.cpp b/unit_tests/mpi/test_reduce.cpp similarity index 94% rename from unit_tests/test_reduce.cpp rename to unit_tests/mpi/test_reduce.cpp index a8d33efe..6eafcd48 100644 --- a/unit_tests/test_reduce.cpp +++ b/unit_tests/mpi/test_reduce.cpp @@ -50,7 +50,7 @@ void test_reduce_1d_contig() { Kokkos::parallel_for( sendv.extent(0), KOKKOS_LAMBDA(const int i) { sendv(i) = rank + i; }); - KokkosComm::reduce(Kokkos::DefaultExecutionSpace(), sendv, recvv, MPI_SUM, 0, MPI_COMM_WORLD); + KokkosComm::mpi::reduce(Kokkos::DefaultExecutionSpace(), sendv, recvv, MPI_SUM, 0, MPI_COMM_WORLD); if (0 == rank) { int errs; diff --git a/unit_tests/test_sendrecv.cpp b/unit_tests/mpi/test_sendrecv.cpp similarity index 68% rename from unit_tests/test_sendrecv.cpp rename to unit_tests/mpi/test_sendrecv.cpp index 04ceabd2..a88dec6f 100644 --- a/unit_tests/test_sendrecv.cpp +++ b/unit_tests/mpi/test_sendrecv.cpp @@ -30,9 +30,9 @@ class SendRecv : public testing::Test { using ScalarTypes = ::testing::Types, Kokkos::complex>; TYPED_TEST_SUITE(SendRecv, ScalarTypes); -template +template void send_comm_mode_1d_contig() { - if constexpr (std::is_same_v) { + if (SendMode == KokkosComm::mpi::CommMode::Ready) { GTEST_SKIP() << "Skipping test for ready-mode send"; } @@ -49,10 +49,10 @@ void send_comm_mode_1d_contig() { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::send(SendMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); + KokkosComm::mpi::send(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); } else if (1 == rank) { int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + KokkosComm::mpi::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != i; }, errs); @@ -60,9 +60,9 @@ void send_comm_mode_1d_contig() { } } -template +template void send_comm_mode_1d_noncontig() { - if constexpr (std::is_same_v) { + if (SendMode == KokkosComm::mpi::CommMode::Ready) { GTEST_SKIP() << "Skipping test for ready-mode send"; } @@ -77,10 +77,10 @@ void send_comm_mode_1d_noncontig() { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::send(SendMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); + KokkosComm::mpi::send(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); } else if (1 == rank) { int src = 0; - KokkosComm::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); + KokkosComm::mpi::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != i; }, errs); @@ -89,27 +89,27 @@ void send_comm_mode_1d_noncontig() { } TYPED_TEST(SendRecv, 1D_contig_standard) { - send_comm_mode_1d_contig(); + send_comm_mode_1d_contig(); } TYPED_TEST(SendRecv, 1D_contig_ready) { - send_comm_mode_1d_contig(); + send_comm_mode_1d_contig(); } TYPED_TEST(SendRecv, 1D_contig_synchronous) { - send_comm_mode_1d_contig(); + send_comm_mode_1d_contig(); } TYPED_TEST(SendRecv, 1D_noncontig_standard) { - send_comm_mode_1d_noncontig(); + send_comm_mode_1d_noncontig(); } TYPED_TEST(SendRecv, 1D_noncontig_ready) { - send_comm_mode_1d_noncontig(); + send_comm_mode_1d_noncontig(); } TYPED_TEST(SendRecv, 1D_noncontig_synchronous) { - send_comm_mode_1d_noncontig(); + send_comm_mode_1d_noncontig(); } } // namespace diff --git a/unit_tests/test_barrier.cpp b/unit_tests/test_barrier.cpp index c9aa95ca..11c9c42e 100644 --- a/unit_tests/test_barrier.cpp +++ b/unit_tests/test_barrier.cpp @@ -20,11 +20,6 @@ namespace { -TEST(Barrier, 0) { - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - KokkosComm::barrier(Kokkos::DefaultExecutionSpace(), MPI_COMM_WORLD); -} +TEST(Barrier, 0) { KokkosComm::barrier(KokkosComm::Handle<>{}); } } // namespace diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_isendirecv.cpp index 0c0ada4a..3bbd3ecb 100644 --- a/unit_tests/test_isendirecv.cpp +++ b/unit_tests/test_isendirecv.cpp @@ -17,7 +17,6 @@ #include #include "KokkosComm.hpp" -#include "impl/KokkosComm_irecv.hpp" #include "view_builder.hpp" @@ -38,25 +37,21 @@ void test_1d(const View1D &a) { static_assert(View1D::rank == 1, ""); using Scalar = typename View1D::non_const_value_type; - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + KokkosComm::Handle<> h; + if (h.size() < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << h.size() << " provided)"; } - if (0 == rank) { + if (0 == h.rank()) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::Req req = - KokkosComm::isend(KokkosComm::DefaultCommMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); - req.wait(); - } else if (1 == rank) { - int src = 0; - MPI_Request req; - KokkosComm::irecv(a, src, 0, MPI_COMM_WORLD, req); - MPI_Wait(&req, MPI_STATUS_IGNORE); + KokkosComm::Req req = KokkosComm::isend(h, a, dst, 0); + KokkosComm::wait(req); + } else if (1 == h.rank()) { + int src = 0; + KokkosComm::Req req = KokkosComm::irecv(h, a, src, 0); + KokkosComm::wait(req); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); @@ -69,27 +64,24 @@ void test_2d(const View2D &a) { static_assert(View2D::rank == 2, ""); using Scalar = typename View2D::non_const_value_type; - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size < 2) { - GTEST_SKIP() << "Requires >= 2 ranks (" << size << " provided)"; + KokkosComm::Handle<> h; + if (h.size() < 2) { + GTEST_SKIP() << "Requires >= 2 ranks (" << h.size() << " provided)"; } using Policy = Kokkos::MDRangePolicy>; Policy policy({0, 0}, {a.extent(0), a.extent(1)}); - if (0 == rank) { + if (0 == h.rank()) { int dst = 1; Kokkos::parallel_for( policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); - KokkosComm::Req req = - KokkosComm::isend(KokkosComm::DefaultCommMode(), Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); - req.wait(); - } else if (1 == rank) { + KokkosComm::Req req = KokkosComm::isend(h, a, dst, 0); + KokkosComm::wait(req); + } else if (1 == h.rank()) { int src = 0; - KokkosComm::Req req = KokkosComm::irecv(a, src, 0, MPI_COMM_WORLD); - req.wait(); + KokkosComm::Req req = KokkosComm::irecv(h, a, src, 0); + KokkosComm::wait(req); int errs; Kokkos::parallel_reduce( policy, KOKKOS_LAMBDA(int i, int j, int &lsum) { lsum += a(i, j) != Scalar(i * a.extent(0) + j); }, errs); diff --git a/unit_tests/test_main.cpp b/unit_tests/test_main.cpp index 7e9b0b44..680e322b 100644 --- a/unit_tests/test_main.cpp +++ b/unit_tests/test_main.cpp @@ -23,7 +23,7 @@ #include #include -#include "impl/KokkosComm_include_mpi.hpp" +#include "mpi/impl/KokkosComm_include_mpi.hpp" class MpiEnvironment : public ::testing::Environment { public: From 08fff5bfddffd240c437d64f4134787eaba8ea98 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Jul 2024 11:49:35 -0600 Subject: [PATCH 02/18] docs --- docs/api/core.rst | 182 ++++++++++++-------------------- docs/api/core_irecv.cpp | 4 + docs/api/core_isend.cpp | 30 ++++++ docs/api/mpi.rst | 225 ++++++++++++++++++++++++++++++++++++++++ docs/index.rst | 1 + 5 files changed, 325 insertions(+), 117 deletions(-) create mode 100644 docs/api/core_irecv.cpp create mode 100644 docs/api/core_isend.cpp create mode 100644 docs/api/mpi.rst diff --git a/docs/api/core.rst b/docs/api/core.rst index f998ed91..af1c0574 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -1,162 +1,110 @@ Core ==== -.. list-table:: MPI API Support - :widths: 40 30 15 - :header-rows: 1 - - * - MPI - - ``KokkosComm::`` - - ``Kokkos::View`` - * - ``MPI_Send`` - - ``send`` or ``send(KokkosComm::DefaultCommMode{}, ...)`` - - ✓ - * - ``MPI_Rsend`` - - ``send(KokkosComm::ReadyCommMode{}, ...)`` - - ✓ - * - ``MPI_Recv`` - - ``recv`` - - ✓ - * - ``MPI_Ssend`` - - ``send(KokkosComm::SynchronousCommMode{}, ...)`` - - ✓ - * - ``MPI_Isend`` - - ``isend`` or ``isend(KokkosComm::DefaultCommMode{}, ...)`` - - ✓ - * - ``MPI_Irsend`` - - ``isend(KokkosComm::ReadyCommMode{}, ...)`` - - ✓ - * - ``MPI_Issend`` - - ``isend(KokkosComm::SynchronousCommMode{}, ...)`` - - ✓ - * - ``MPI_Reduce`` - - ``reduce`` - - ✓ - Point-to-point -------------- -.. cpp:function:: template \ - Req KokkosComm::isend(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) +.. cpp:namespace:: KokkosComm - Wrapper for ``MPI_Isend``, ``MPI_Irsend`` and ``MPI_Issend``. +.. cpp:function:: template Req isend(Handle &h, SendView &sv, int dest, int tag) - :param mode: The communication mode to use - :param space: The execution space to operate in - :param sv: The data to send - :param dest: the destination rank - :param tag: the MPI tag - :param comm: the MPI communicator - :tparam IsendMode: A communication mode to use, one of: ``KokkosComm::DefaultCommMode``, ``KokkosComm::StandardCommMode``, ``KokkosComm::SynchronousCommMode`` or ``KokkosComm::ReadyCommMode`` (modeled with the ``KokkosComm::CommunicationMode`` concept) - :tparam SendView: A Kokkos::View to send - :tparam ExecSpace: A Kokkos execution space to operate in - :returns: A KokkosComm::Req representing the asynchronous communication and any lifetime-extended views. + Initiates a non-blocking send operation. -.. cpp:function:: template \ - void KokkosComm::send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) + :tparam SendView: The type of the Kokkos view to send. + :tparam ExecSpace: The execution space to use. Defaults to Kokkos::DefaultExecutionSpace. + :tparam TRANSPORT: The transport mechanism to use. Defaults to DefaultTransport. - Wrapper for ``MPI_Send``, ``MPI_Rsend`` and ``MPI_Ssend``. + :param h: A handle to the execution space and transport mechanism. + :param sv: The Kokkos view to send. + :param dest: The destination rank. + :param tag: The message tag. - :param mode: The communication mode to use - :param space: The execution space to operate in - :param sv: The data to send - :param dest: the destination rank - :param tag: the MPI tag - :param comm: the MPI communicator - :tparam SendMode: A communication mode to use, one of: ``KokkosComm::DefaultCommMode``, ``KokkosComm::StandardCommMode``, ``KokkosComm::SynchronousCommMode`` or ``KokkosComm::ReadyCommMode`` (modeled with the ``KokkosComm::CommunicationMode`` concept) - :tparam SendView: A Kokkos::View to send - :tparam ExecSpace: A Kokkos execution space to operate in + :return: A request object for the non-blocking send operation. -.. cpp:function:: template \ - void KokkosComm::recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) +.. cpp:function:: template Req isend(SendView &sv, int dest, int tag) - MPI_Recv wrapper + Initiates a non-blocking send operation using a default handle. - :param space: The execution space to operate in - :param srv: The data to recv - :param src: the source rank - :param tag: the MPI tag - :param comm: the MPI communicator - :tparam Recv: A Kokkos::View to send - :tparam ExecSpace: A Kokkos execution space to operate in + :tparam SendView: The type of the Kokkos view to send. + :tparam ExecSpace: The execution space to use. Defaults to Kokkos::DefaultExecutionSpace. + :tparam TRANSPORT: The transport mechanism to use. Defaults to DefaultTransport. + :param sv: The Kokkos view to send. + :param dest: The destination rank. + :param tag: The message tag. -Collective ----------- + :return: A request object for the non-blocking send operation. -.. cpp:function:: template \ - void KokkosComm::reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) + Example usage: - MPI_Reduce wrapper +.. literalinclude:: core_isend.cpp + :language: cpp - :param space: The execution space to operate in - :param sv: The data to send - :param rv: The view to receive into - :param op: The MPI_Op to use in the reduction - :param root: The root rank for the reduction - :param comm: the MPI communicator - :tparam SendView: A Kokkos::View to send - :tparam RecvView: A Kokkos::View to recv - :tparam ExecSpace: A Kokkos execution space to operate in -.. cpp:function:: template \ - void KokkosComm::allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) +.. cpp:function:: template Req irecv(Handle &h, RecvView &rv, int src, int tag) - MPI_Allgather wrapper + Initiates a non-blocking receive operation. - :param space: The execution space to operate in - :param sv: The data to send - :param rv: The view to receive into - :param comm: the MPI communicator - :tparam SendView: A Kokkos::View to send. Contiguous and rank less than 2. - :tparam RecvView: A Kokkos::View to recv. Contiguous and rank 1. - :tparam ExecSpace: A Kokkos execution space to operate in + :tparam RecvView: The type of the Kokkos view for receiving data. + :tparam ExecSpace: The execution space where the operation will be performed. Defaults to `Kokkos::DefaultExecutionSpace`. + :tparam TRANSPORT: The transport mechanism to be used. Defaults to `DefaultTransport`. - If ``sv`` is a rank-0 view, the value from the jth rank will be placed in index j of ``rv``. + :param h: A handle to the execution space and transport mechanism. + :param rv: The Kokkos view where the received data will be stored. + :param src: The source rank from which to receive data. + :param tag: The message tag to identify the communication. -Related Types -------------- + :return: A request object of type `Req` representing the non-blocking receive operation. + + This function initiates a non-blocking receive operation using the specified execution space and transport mechanism. The data will be received into the provided view from the specified source rank and message tag. The function returns a request object that can be used to check the status of the receive operation or to wait for its completion. + + Example usage: -Communication Modes -^^^^^^^^^^^^^^^^^^^ +.. literalinclude:: core_irecv.cpp + :language: cpp -Structures to specify the mode of an operation. Buffered mode is not supported. -.. cpp:struct:: KokkosComm::StandardCommMode - Let the MPI implementation decide whether outgoing messages will be buffered. Send operations can be started whether or not a matching receive has been started. They may complete before a matching receive begins. Standard mode is non-local: successful completion of the send operation may depend on the occurrence of a matching receive. -.. cpp:struct:: KokkosComm::SynchronousCommMode - Send operations complete successfully only if a matching receive is started, and the receive operation has started to receive the message sent. +.. cpp:function:: template Req irecv(RecvView &rv, int src, int tag) -.. cpp:struct:: KokkosComm::ReadyCommMode + Initiates a non-blocking receive operation using a default handle. - Send operations may be started only if the matching receive is already started. + :tparam RecvView: The type of the Kokkos view for receiving data. + :tparam ExecSpace: The execution space where the operation will be performed. Defaults to `Kokkos::DefaultExecutionSpace`. + :tparam TRANSPORT: The transport mechanism to be used. Defaults to `DefaultTransport`. -.. cpp:struct:: KokkosComm::DefaultCommMode + :param rv: The Kokkos view where the received data will be stored. + :param src: The source rank from which to receive data. + :param tag: The message tag to identify the communication. - The default mode is aliased as ``Standard`` but lets users override the behavior of operations at compile-time using the ``KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE`` pre-processor definition. The latter forces ``Synchronous`` mode for all "default-mode" operations, which can be helpful for debugging purposes, e.g., asserting that the communication scheme is correct. + :return: A request object of type `Req` representing the non-blocking receive operation. -Requests -^^^^^^^^ +Collective +---------- + +.. cpp:namespace:: KokkosComm + +.. cpp:function:: template void barrier(Handle &&h) -.. cpp:class:: KokkosComm::Req + A function to create a barrier using the given execution space and transport handle. - A wrapper around an MPI_Request that can also extend the lifetime of Views. + :tparam ExecSpace: The execution space to be used. Defaults to `Kokkos::DefaultExecutionSpace`. + :tparam TRANSPORT: The transport mechanism to be used. Defaults to `DefaultTransport`. + :param h: A handle of type `Handle` to be forwarded to the barrier implementation. - .. cpp:function:: MPI_Request &KokkosComm::Req::mpi_req() - Retrieve a reference to the held MPI_Request. - .. cpp:function:: void KokkosComm::Req::wait() +Related Types +------------- + +.. cpp:namespace:: KokkosComm - Call MPI_Wait on the held MPI_Request and drop copies of any previous arguments to Req::keep_until_wait(). +.. cpp:class:: template Req - .. cpp:function:: template \ - void KokkosComm::Req::keep_until_wait(const View &v) + A template class to handle requests with different transport types. - Extend the lifetime of v at least until Req::wait() is called. - This is useful to prevent a View from being destroyed during an asynchronous MPI operation. + :tparam TRANSPORT: The type of transport. Defaults to :cpp:enumerator:`KokkosComm::DefaultTransport`. diff --git a/docs/api/core_irecv.cpp b/docs/api/core_irecv.cpp new file mode 100644 index 00000000..c561fac3 --- /dev/null +++ b/docs/api/core_irecv.cpp @@ -0,0 +1,4 @@ +Handle<> handle; +Kokkos::View recv_view("recv_view", 100); +auto req = irecv(handle, recv_view, 1/*src*/, 0/*tag*/); +KokkosComm::wait(req); \ No newline at end of file diff --git a/docs/api/core_isend.cpp b/docs/api/core_isend.cpp new file mode 100644 index 00000000..0dcc69bc --- /dev/null +++ b/docs/api/core_isend.cpp @@ -0,0 +1,30 @@ +#include + +// Define the execution space and transport +using ExecSpace = Kokkos::DefaultExecutionSpace; +using Transport = DefaultTransport; + +// Create a Kokkos view +Kokkos::View data("data", 100); + +// Fill the view with some data +Kokkos::parallel_for("fill_data", Kokkos::RangePolicy(0, 100), KOKKOS_LAMBDA(int i) { + data(i) = static_cast(i); +}); + +// Destination rank and message tag +int dest = 1; +int tag = 42; + +// Create a handle +KokkosComm::Handle<> handle; // Same as Handle + +// Initiate a non-blocking send with a handle +auto req1 = isend(handle, data, dest, tag); + +// Initiate a non-blocking send with a default handle +auto req2 = isend(data, dest, tag); + +// Wait for the requests to complete (assuming a wait function exists) +req1.wait(); +req2.wait(); \ No newline at end of file diff --git a/docs/api/mpi.rst b/docs/api/mpi.rst new file mode 100644 index 00000000..9f6b5748 --- /dev/null +++ b/docs/api/mpi.rst @@ -0,0 +1,225 @@ +MPI +==== + +.. list-table:: MPI API Support + :widths: 40 30 15 + :header-rows: 1 + + * - MPI + - ``KokkosComm::mpi::`` + - ``Kokkos::View`` + * - ``MPI_Send`` + - ``send`` or ``send`` + - ✓ + * - ``MPI_Rsend`` + - ``send`` + - ✓ + * - ``MPI_Recv`` + - ``recv`` + - ✓ + * - ``MPI_Ssend`` + - ``send`` + - ✓ + * - ``MPI_Isend`` + - ``isend`` or ``isend`` + - ✓ + * - ``MPI_Irsend`` + - ``isend`` + - ✓ + * - ``MPI_Issend`` + - ``isend`` + - ✓ + * - ``MPI_Reduce`` + - ``reduce`` + - ✓ + +Point-to-point +-------------- + +.. cpp:namespace:: KokkosComm::mpi + +.. cpp:function:: template Req isend(Handle &h, const SendView &sv, int dest, int tag) + + Initiates a non-blocking send operation. + + :tparam SendMode: The communication mode. + :tparam ExecSpace: The execution space. + :tparam SendView: The type of the view to be sent. + :param h: The handle for the execution space and MPI. + :param sv: The view to be sent. + :param dest: The destination rank. + :param tag: The message tag. + :return: A request object for the non-blocking send operation. + +.. cpp:function:: template void irecv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Request &req) + + Initiates a non-blocking receive operation. + + :tparam RecvView: The type of the view to be received. + :param rv: The view to be received. + :param src: The source rank. + :param tag: The message tag. + :param comm: The MPI communicator. + :param req: The MPI request object for the non-blocking receive operation. + :throws std::runtime_error: If the view is not contiguous. + +.. cpp:function:: template void send(const SendView &sv, int dest, int tag, MPI_Comm comm) + + Initiates a blocking send operation. + + :tparam SendView: The type of the view to be sent. + :param sv: The view to be sent. + :param dest: The destination rank. + :param tag: The message tag. + :param comm: The MPI communicator. + +.. cpp:function:: template void send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) + + Initiates a blocking send operation with a specified execution space and communication mode. + + :tparam SendMode: The communication mode (default is CommMode::Default). + :tparam ExecSpace: The execution space. + :tparam SendView: The type of the view to be sent. + :param space: The execution space. + :param sv: The view to be sent. + :param dest: The destination rank. + :param tag: The message tag. + :param comm: The MPI communicator. + +.. cpp:function:: template void recv(const RecvView &rv, int src, int tag, MPI_Comm comm, MPI_Status *status) + + Initiates a blocking receive operation. + + :tparam RecvView: The type of the view to be received. + :param rv: The view to be received. + :param src: The source rank. + :param tag: The message tag. + :param comm: The MPI communicator. + :param status: The MPI status object for the blocking receive operation. + +.. cpp:function:: template void recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) + + Initiates a blocking receive operation with a specified execution space. + + :tparam ExecSpace: The execution space. + :tparam RecvView: The type of the view to be received. + :param space: The execution space. + :param rv: The view to be received. + :param src: The source rank. + :param tag: The message tag. + :param comm: The MPI communicator. + + + +Collective +---------- + +.. cpp:function:: template \ + void KokkosComm::reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) + + MPI_Reduce wrapper + + :param space: The execution space to operate in + :param sv: The data to send + :param rv: The view to receive into + :param op: The MPI_Op to use in the reduction + :param root: The root rank for the reduction + :param comm: the MPI communicator + :tparam SendView: A Kokkos::View to send + :tparam RecvView: A Kokkos::View to recv + :tparam ExecSpace: A Kokkos execution space to operate in + +.. cpp:function:: template void allgather(const SendView &sv, const RecvView &rv, MPI_Comm comm) + + Performs an allgather operation, gathering data from all processes and distributing it to all processes. + + :tparam SendView: The type of the view to be sent. + :tparam RecvView: The type of the view to be received. + :param sv: The view to be sent. + :param rv: The view to be received. + :param comm: The MPI communicator. + + If ``sv`` is a rank-0 view, the value from the jth rank will be placed in index j of ``rv``. + +.. cpp:function:: template void allgather(const RecvView &rv, MPI_Comm comm) + + Performs an in-place allgather operation, gathering data from all processes and distributing it to all processes. + + :tparam RecvView: The type of the view to be received. + :param rv: The view to be received. + :param comm: The MPI communicator. + +.. cpp:function:: template void allgather(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Comm comm) + + Performs an allgather operation with a specified execution space, gathering data from all processes and distributing it to all processes. + + :tparam ExecSpace: The execution space. + :tparam SendView: The type of the view to be sent. + :tparam RecvView: The type of the view to be received. + :param space: The execution space. + :param sv: The view to be sent. + :param rv: The view to be received. + :param comm: The MPI communicator. + +.. cpp:function:: inline void barrier(MPI_Comm comm) + + Blocks until all processes in the communicator have reached this routine. + + :param comm: The MPI communicator. + + +.. cpp:function:: template void reduce(const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) + + Performs a reduction operation, combining data from all processes and distributing the result to the root process. + + :tparam SendView: The type of the view to be sent. + :tparam RecvView: The type of the view to be received. + :param sv: The view to be sent. + :param rv: The view to be received. + :param op: The MPI operation to be applied. + :param root: The rank of the root process. + :param comm: The MPI communicator. + +.. cpp:function:: template void reduce(const ExecSpace &space, const SendView &sv, const RecvView &rv, MPI_Op op, int root, MPI_Comm comm) + + Performs a reduction operation with a specified execution space, combining data from all processes and distributing the result to the root process. + + :tparam ExecSpace: The execution space. + :tparam SendView: The type of the view to be sent. + :tparam RecvView: The type of the view to be received. + :param space: The execution space. + :param sv: The view to be sent. + :param rv: The view to be received. + :param op: The MPI operation to be applied. + :param root: The rank of the root process. + :param comm: The MPI communicator. + + +Related Types +------------- + +.. cpp:namespace:: KokkosComm::mpi + +.. _CommMode: + +.. cpp:enum-class:: CommMode + + A scoped enum to specify the mode of an operation. Buffered mode is not supported. + + .. cpp:enumerator:: Standard + + Standard mode: the MPI implementation decides whether outgoing messages will be buffered. Send operations can be started whether or not a matching receive has been started. They may complete before a matching receive is started. Standard mode is non-local: successful completion of the send operation may depend on the occurrence of a matching receive. + + .. cpp:enumerator:: Ready + + Ready mode: Send operations may be started only if the matching receive is already started. + + .. cpp:enumerator:: Synchronous + + Synchronous mode: Send operations complete successfully only if a matching receive is started, and the receive operation has started to receive the message sent. + + .. cpp:enumerator:: Default + + Default mode is an alias for ``Standard`` mode, but lets users override the behavior of operations at compile-time using the ``KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE`` pre-processor define. This forces ``Synchronous`` mode for all "default-mode" operations, which can be useful for debugging purposes, e.g., for asserting that the communication scheme is correct. + + diff --git a/docs/index.rst b/docs/index.rst index fed07349..c18762fd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -19,6 +19,7 @@ API Reference api/core api/traits api/packing + api/mpi Design ------ From a3d87d0b97be977045cd6529e4115bf606a6a2be Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Jul 2024 12:22:43 -0600 Subject: [PATCH 03/18] Suggest improved build parallelism, move aliases together --- docs/dev/testing.rst | 4 ++-- src/mpi/KokkosComm_mpi_recv.hpp | 9 ++++----- 2 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/dev/testing.rst b/docs/dev/testing.rst index 7d8a6783..af1dfe46 100644 --- a/docs/dev/testing.rst +++ b/docs/dev/testing.rst @@ -53,7 +53,7 @@ Testing the Install cmake -S "$COMM_SRC"/unit_tests -B "$COMM_UNIT_TESTS_BUILD" -DKokkos_ROOT="$KOKKOS_INSTALL" -DKokkosComm_ROOT="$COMM_INSTALL" -DCMAKE_BUILD_TYPE=RelWithDebInfo echo "==== BUILD UNIT TESTS ====" - VERBOSE=1 cmake --build "$COMM_UNIT_TESTS_BUILD" --parallel 4 + VERBOSE=1 cmake --build "$COMM_UNIT_TESTS_BUILD" --parallel $(nproc) echo "==== RUN UNIT TESTS ====" ctest -V --test-dir "$COMM_UNIT_TESTS_BUILD" @@ -63,7 +63,7 @@ Testing the Install cmake -S "$COMM_SRC"/perf_tests -B "$COMM_PERF_TESTS_BUILD" -DKokkos_ROOT="$KOKKOS_INSTALL" -DKokkosComm_ROOT="$COMM_INSTALL" -DCMAKE_BUILD_TYPE=RelWithDebInfo echo "==== BUILD PERF TESTS ====" - VERBOSE=1 cmake --build "$COMM_PERF_TESTS_BUILD" --parallel 4 + VERBOSE=1 cmake --build "$COMM_PERF_TESTS_BUILD" --parallel $(nproc) echo "==== RUN PERF TESTS ====" ctest -V --test-dir "$COMM_PERF_TESTS_BUILD" diff --git a/src/mpi/KokkosComm_mpi_recv.hpp b/src/mpi/KokkosComm_mpi_recv.hpp index 1c5b26fd..d04d9625 100644 --- a/src/mpi/KokkosComm_mpi_recv.hpp +++ b/src/mpi/KokkosComm_mpi_recv.hpp @@ -44,13 +44,12 @@ template void recv(const ExecSpace &space, RecvView &rv, int src, int tag, MPI_Comm comm) { Kokkos::Tools::pushRegion("KokkosComm::Impl::recv"); - using KCT = KokkosComm::Traits; - using KCPT = KokkosComm::PackTraits; + using KCT = KokkosComm::Traits; + using KCPT = KokkosComm::PackTraits; + using Packer = typename KCPT::packer_type; + using Args = typename Packer::args_type; if (!KokkosComm::is_contiguous(rv)) { - using Packer = typename KCPT::packer_type; - using Args = typename Packer::args_type; - Args args = Packer::allocate_packed_for(space, "packed", rv); space.fence(); // make sure allocation is complete before recv MPI_Recv(KokkosComm::data_handle(args.view), args.count, args.datatype, src, tag, comm, MPI_STATUS_IGNORE); From 55c9eaeada70c5914f4c8bc9225d2fcd017f3301 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Jul 2024 15:35:02 -0600 Subject: [PATCH 04/18] docs: suggest cwpearson/clang-format-14 --- docs/CONTRIBUTING.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/CONTRIBUTING.rst b/docs/CONTRIBUTING.rst index 8385aafa..41ea33f7 100644 --- a/docs/CONTRIBUTING.rst +++ b/docs/CONTRIBUTING.rst @@ -27,7 +27,7 @@ Alternatively, you can use docker/podman: (expects $PWD to be the kokkos-comm tr .. code-block:: bash shopt -s globstar - podman run -v $PWD:/src xianpengshen/clang-tools:14 clang-format -i {src,unit_tests,perf_tests}/**/*.[ch]pp + podman run --rm -v ${PWD}:/src ghcr.io/cwpearson/clang-format-14 clang-format -i {src,unit_tests,perf_tests}/**/*.[ch]pp Behavioral Expectations From 9de9d7aedd1487a750529d93aed14c8f52603222 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Jul 2024 15:50:04 -0600 Subject: [PATCH 05/18] ci: displayed name is PR name --- .github/workflows/docs-deploy.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/docs-deploy.yml b/.github/workflows/docs-deploy.yml index afeda954..e002d1d5 100644 --- a/.github/workflows/docs-deploy.yml +++ b/.github/workflows/docs-deploy.yml @@ -3,7 +3,6 @@ # To get started with mdBook see: https://rust-lang.github.io/mdBook/index.html # name: Deploy sphinx site to Pages -run-name: Deploy sphinx site to Pages on: # Runs on pushes targeting the default branch From 838781db706b73219ebf0b94f1ed507e9194dbb9 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 10 Jul 2024 15:59:59 -0600 Subject: [PATCH 06/18] test_2dhalo.cpp: use provided space --- perf_tests/test_2dhalo.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/perf_tests/test_2dhalo.cpp b/perf_tests/test_2dhalo.cpp index 8c7562b8..c6ffb8c0 100644 --- a/perf_tests/test_2dhalo.cpp +++ b/perf_tests/test_2dhalo.cpp @@ -25,7 +25,7 @@ void noop(benchmark::State, MPI_Comm) {} template void send_recv(benchmark::State &, MPI_Comm comm, const Space &space, int nx, int ny, int rx, int ry, int rs, const View &v) { - KokkosComm::Handle<> h{comm}; + KokkosComm::Handle<> h{space, comm}; // 2D index of nbrs in minus and plus direction (periodic) const int xm1 = (rx + rs - 1) % rs; From cfa191142a0fea7b6d1d3e7f13a0ed07482b7e8c Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 11 Jul 2024 08:44:24 -0600 Subject: [PATCH 07/18] Increase test timeout --- .github/workflows/linux.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux.yaml b/.github/workflows/linux.yaml index ef29f8fb..2d058ef8 100644 --- a/.github/workflows/linux.yaml +++ b/.github/workflows/linux.yaml @@ -62,7 +62,7 @@ jobs: COMM_SRC: ${{ github.workspace }} COMM_BUILD: ${{ github.workspace }}/build runs-on: ubuntu-latest - timeout-minutes: 5 + timeout-minutes: 10 steps: - name: Install MPI run: | From 8e2010c0622cf2243bfc60053df11378b7d0786c Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 11 Jul 2024 08:54:38 -0600 Subject: [PATCH 08/18] Req: don't extend lifetime of unmanaged views --- src/mpi/KokkosComm_mpi_req.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/mpi/KokkosComm_mpi_req.hpp b/src/mpi/KokkosComm_mpi_req.hpp index 898aacf4..1e027b2e 100644 --- a/src/mpi/KokkosComm_mpi_req.hpp +++ b/src/mpi/KokkosComm_mpi_req.hpp @@ -51,11 +51,12 @@ class Req { MPI_Request &mpi_request() { return record_->req_; } // keep a reference to this view around until wait() is called - // TODO: no-op if unmanaged template void extend_view_lifetime(const View &v) { - // capture a copy of v into a no-op lambda - record_->postWaits_.push_back([v]() {} /* capture v into a no-op lambda that does nothing*/); + // unmanaged views don't own the underlying buffer, so no need to extend lifetime + if (v.use_count() != 0) { + record_->postWaits_.push_back([v]() {}); + } } void call_after_mpi_wait(std::function &&f) { record_->postWaits_.push_back(f); } From ace266bc3c134127abf223e91566a7ec229bef68 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 11 Jul 2024 09:03:56 -0600 Subject: [PATCH 09/18] KokkosComm_version -> KokkosComm_config --- cmake/KokkosComm_config.hpp.in | 2 ++ src/CMakeLists.txt | 1 + src/KokkosComm.hpp | 4 +--- src/KokkosComm_config.hpp | 19 ------------------- src/KokkosComm_fwd.hpp | 2 +- 5 files changed, 5 insertions(+), 23 deletions(-) delete mode 100644 src/KokkosComm_config.hpp diff --git a/cmake/KokkosComm_config.hpp.in b/cmake/KokkosComm_config.hpp.in index 9664a347..81bf9a1c 100644 --- a/cmake/KokkosComm_config.hpp.in +++ b/cmake/KokkosComm_config.hpp.in @@ -19,3 +19,5 @@ #define KOKKOSCOMM_VERSION_MAJOR @KOKKOSCOMM_VERSION_MAJOR@ #define KOKKOSCOMM_VERSION_MINOR @KOKKOSCOMM_VERSION_MINOR@ #define KOKKOSCOMM_VERSION_PATCH @KOKKOSCOMM_VERSION_PATCH@ + +#cmakedefine KOKKOSCOMM_ENABLE_MPI diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 46d64cb6..138f7e34 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -9,3 +9,4 @@ target_include_directories(KokkosComm INTERFACE $ ) target_link_libraries(KokkosComm INTERFACE MPI::MPI_CXX Kokkos::kokkos) + diff --git a/src/KokkosComm.hpp b/src/KokkosComm.hpp index c5aee687..cdb33a43 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm.hpp @@ -20,7 +20,7 @@ // transport declarations // TODO: could probably be moved to a per-transport file to be included -#if defined(KOKKOSCOMM_TRANSPORT_MPI) +#if defined(KOKKOSCOMM_ENABLE_MPI) #include "mpi/KokkosComm_mpi.hpp" #include "mpi/KokkosComm_mpi_send.hpp" #include "mpi/KokkosComm_mpi_allgather.hpp" @@ -35,8 +35,6 @@ #error at least one transport must be defined #endif -#include "KokkosComm_version.hpp" - #include "KokkosComm_concepts.hpp" #include "KokkosComm_point_to_point.hpp" #include "KokkosComm_collective.hpp" diff --git a/src/KokkosComm_config.hpp b/src/KokkosComm_config.hpp deleted file mode 100644 index a399a8df..00000000 --- a/src/KokkosComm_config.hpp +++ /dev/null @@ -1,19 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -#pragma once - -#define KOKKOSCOMM_TRANSPORT_MPI // TODO: set through CMake diff --git a/src/KokkosComm_fwd.hpp b/src/KokkosComm_fwd.hpp index e3da5a1a..55acea88 100644 --- a/src/KokkosComm_fwd.hpp +++ b/src/KokkosComm_fwd.hpp @@ -22,7 +22,7 @@ #include "KokkosComm_config.hpp" namespace KokkosComm { -#if defined(KOKKOSCOMM_TRANSPORT_MPI) +#if defined(KOKKOSCOMM_ENABLE_MPI) class Mpi; using DefaultTransport = Mpi; using FallbackTransport = Mpi; From 85cfa1069ee87cfd07fbaf1eea365a5e2d61b817 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 11 Jul 2024 11:45:54 -0600 Subject: [PATCH 10/18] isend->send, irecv->recv --- docs/api/core.rst | 24 +++-- docs/api/{core_irecv.cpp => core_recv.cpp} | 0 docs/api/{core_isend.cpp => core_send.cpp} | 8 +- perf_tests/CMakeLists.txt | 3 +- perf_tests/test_2dhalo.cpp | 18 ++-- ...ency_sendrecv.cpp => test_osu_latency.cpp} | 93 ++++++++++++++---- perf_tests/test_osu_latency_isendirecv.cpp | 96 ------------------- src/KokkosComm_fwd.hpp | 4 +- src/KokkosComm_point_to_point.hpp | 16 ++-- src/mpi/KokkosComm_mpi_irecv.hpp | 4 +- src/mpi/KokkosComm_mpi_isend.hpp | 4 +- unit_tests/CMakeLists.txt | 2 +- unit_tests/mpi/test_sendrecv.cpp | 16 ++-- ...{test_isendirecv.cpp => test_sendrecv.cpp} | 24 ++--- 14 files changed, 142 insertions(+), 170 deletions(-) rename docs/api/{core_irecv.cpp => core_recv.cpp} (100%) rename docs/api/{core_isend.cpp => core_send.cpp} (85%) rename perf_tests/{test_osu_latency_sendrecv.cpp => test_osu_latency.cpp} (52%) delete mode 100644 perf_tests/test_osu_latency_isendirecv.cpp rename unit_tests/{test_isendirecv.cpp => test_sendrecv.cpp} (81%) diff --git a/docs/api/core.rst b/docs/api/core.rst index af1c0574..e2fd6b88 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -6,10 +6,13 @@ Point-to-point .. cpp:namespace:: KokkosComm -.. cpp:function:: template Req isend(Handle &h, SendView &sv, int dest, int tag) +.. cpp:function:: template Req send(Handle &h, SendView &sv, int dest, int tag) Initiates a non-blocking send operation. + .. warning:: + This is not a blocking operation despite being named like ``MPI_Send``. + :tparam SendView: The type of the Kokkos view to send. :tparam ExecSpace: The execution space to use. Defaults to Kokkos::DefaultExecutionSpace. :tparam TRANSPORT: The transport mechanism to use. Defaults to DefaultTransport. @@ -21,10 +24,13 @@ Point-to-point :return: A request object for the non-blocking send operation. -.. cpp:function:: template Req isend(SendView &sv, int dest, int tag) +.. cpp:function:: template Req send(SendView &sv, int dest, int tag) Initiates a non-blocking send operation using a default handle. + .. warning:: + This is not a blocking operation despite being named like ``MPI_Send``. + :tparam SendView: The type of the Kokkos view to send. :tparam ExecSpace: The execution space to use. Defaults to Kokkos::DefaultExecutionSpace. :tparam TRANSPORT: The transport mechanism to use. Defaults to DefaultTransport. @@ -37,15 +43,18 @@ Point-to-point Example usage: -.. literalinclude:: core_isend.cpp +.. literalinclude:: core_send.cpp :language: cpp -.. cpp:function:: template Req irecv(Handle &h, RecvView &rv, int src, int tag) +.. cpp:function:: template Req recv(Handle &h, RecvView &rv, int src, int tag) Initiates a non-blocking receive operation. + .. warning:: + This is not a blocking operation despite being named like ``MPI_Recv``. + :tparam RecvView: The type of the Kokkos view for receiving data. :tparam ExecSpace: The execution space where the operation will be performed. Defaults to `Kokkos::DefaultExecutionSpace`. :tparam TRANSPORT: The transport mechanism to be used. Defaults to `DefaultTransport`. @@ -61,17 +70,20 @@ Point-to-point Example usage: -.. literalinclude:: core_irecv.cpp +.. literalinclude:: core_recv.cpp :language: cpp -.. cpp:function:: template Req irecv(RecvView &rv, int src, int tag) +.. cpp:function:: template Req recv(RecvView &rv, int src, int tag) Initiates a non-blocking receive operation using a default handle. + .. warning:: + This is not a blocking operation despite being named like ``MPI_Recv``. + :tparam RecvView: The type of the Kokkos view for receiving data. :tparam ExecSpace: The execution space where the operation will be performed. Defaults to `Kokkos::DefaultExecutionSpace`. :tparam TRANSPORT: The transport mechanism to be used. Defaults to `DefaultTransport`. diff --git a/docs/api/core_irecv.cpp b/docs/api/core_recv.cpp similarity index 100% rename from docs/api/core_irecv.cpp rename to docs/api/core_recv.cpp diff --git a/docs/api/core_isend.cpp b/docs/api/core_send.cpp similarity index 85% rename from docs/api/core_isend.cpp rename to docs/api/core_send.cpp index 0dcc69bc..516c0797 100644 --- a/docs/api/core_isend.cpp +++ b/docs/api/core_send.cpp @@ -20,11 +20,11 @@ int tag = 42; KokkosComm::Handle<> handle; // Same as Handle // Initiate a non-blocking send with a handle -auto req1 = isend(handle, data, dest, tag); +auto req1 = send(handle, data, dest, tag); // Initiate a non-blocking send with a default handle -auto req2 = isend(data, dest, tag); +auto req2 = send(data, dest, tag); // Wait for the requests to complete (assuming a wait function exists) -req1.wait(); -req2.wait(); \ No newline at end of file +KokkosComm::wait(req1); +KokkosComm::wait(req2); \ No newline at end of file diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index 6940f1cc..a95ff234 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -37,8 +37,7 @@ if (KOKKOSCOMM_ENABLE_MPI) add_executable(perf_test-main test_main.cpp test_sendrecv.cpp test_2dhalo.cpp - test_osu_latency_sendrecv.cpp - test_osu_latency_isendirecv.cpp + test_osu_latency.cpp ) if(KOKKOSCOMM_ENABLE_TESTS) kokkoscomm_add_cxx_flags(TARGET perf_test-main) diff --git a/perf_tests/test_2dhalo.cpp b/perf_tests/test_2dhalo.cpp index c6ffb8c0..073eb683 100644 --- a/perf_tests/test_2dhalo.cpp +++ b/perf_tests/test_2dhalo.cpp @@ -50,15 +50,15 @@ void send_recv(benchmark::State &, MPI_Comm comm, const Space &space, int nx, in std::vector> reqs; // std::cerr << get_rank(rx, ry) << " -> " << get_rank(xp1, ry) << "\n"; - reqs.push_back(KokkosComm::isend(h, xp1_s, get_rank(xp1, ry), 0)); - reqs.push_back(KokkosComm::isend(h, xm1_s, get_rank(xm1, ry), 1)); - reqs.push_back(KokkosComm::isend(h, yp1_s, get_rank(rx, yp1), 2)); - reqs.push_back(KokkosComm::isend(h, ym1_s, get_rank(rx, ym1), 3)); - - KokkosComm::mpi::recv(h.space(), xm1_r, get_rank(xm1, ry), 0, h.mpi_comm()); - KokkosComm::mpi::recv(h.space(), xp1_r, get_rank(xp1, ry), 1, h.mpi_comm()); - KokkosComm::mpi::recv(h.space(), ym1_r, get_rank(rx, ym1), 2, h.mpi_comm()); - KokkosComm::mpi::recv(h.space(), yp1_r, get_rank(rx, yp1), 3, h.mpi_comm()); + reqs.push_back(KokkosComm::send(h, xp1_s, get_rank(xp1, ry), 0)); + reqs.push_back(KokkosComm::send(h, xm1_s, get_rank(xm1, ry), 1)); + reqs.push_back(KokkosComm::send(h, yp1_s, get_rank(rx, yp1), 2)); + reqs.push_back(KokkosComm::send(h, ym1_s, get_rank(rx, ym1), 3)); + + reqs.push_back(KokkosComm::recv(h, xm1_r, get_rank(xm1, ry), 0)); + reqs.push_back(KokkosComm::recv(h, xp1_r, get_rank(xp1, ry), 1)); + reqs.push_back(KokkosComm::recv(h, ym1_r, get_rank(rx, ym1), 2)); + reqs.push_back(KokkosComm::recv(h, yp1_r, get_rank(rx, yp1), 3)); // wait for comm KokkosComm::wait_all(reqs); diff --git a/perf_tests/test_osu_latency_sendrecv.cpp b/perf_tests/test_osu_latency.cpp similarity index 52% rename from perf_tests/test_osu_latency_sendrecv.cpp rename to perf_tests/test_osu_latency.cpp index 7c93f896..68a6a248 100644 --- a/perf_tests/test_osu_latency_sendrecv.cpp +++ b/perf_tests/test_osu_latency.cpp @@ -21,9 +21,33 @@ #include "test_utils.hpp" #include "KokkosComm.hpp" -template -void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, int rank, - const View &v) { +template +void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm, KokkosComm::Handle<> &h, const View &v) { + if (h.rank() == 0) { + KokkosComm::wait(KokkosComm::send(h, v, 1, 1)); + } else if (h.rank() == 1) { + KokkosComm::wait(KokkosComm::recv(h, v, 0, 1)); + } +} + +void benchmark_osu_latency_KokkosComm_sendrecv(benchmark::State &state) { + KokkosComm::Handle<> h; + if (h.size() != 2) { + state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + } + + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, h.mpi_comm(), osu_latency_Kokkos_Comm_sendrecv, h, a); + } + state.counters["bytes"] = a.size() * 2; +} + +template +void osu_latency_Kokkos_Comm_mpi_sendrecv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, + const View &v) { if (rank == 0) { KokkosComm::mpi::send(space, v, 1, 0, comm); } else if (rank == 1) { @@ -31,38 +55,65 @@ void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm comm, const M } } +void benchmark_osu_latency_Kokkos_Comm_mpi_sendrecv(benchmark::State &state) { + int rank, size; + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + if (size != 2) { + state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + } + + auto mode = KokkosComm::DefaultCommMode(); + auto space = Kokkos::DefaultExecutionSpace(); + using view_type = Kokkos::View; + view_type a("A", state.range(0)); + + while (state.KeepRunning()) { + do_iteration(state, MPI_COMM_WORLD, osu_latency_Kokkos_Comm_mpi_sendrecv, + space, rank, a); + } + state.counters["bytes"] = a.size() * 2; +} + template -void osu_latency_MPI_sendrecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { +void osu_latency_MPI_isendirecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { + MPI_Request sendreq, recvreq; if (rank == 0) { - MPI_Recv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, - MPI_STATUS_IGNORE); + MPI_Irecv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, &recvreq); + MPI_Wait(&recvreq, MPI_STATUS_IGNORE); } else if (rank == 1) { - MPI_Send(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm); + MPI_Isend(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm, &sendreq); + MPI_Wait(&sendreq, MPI_STATUS_IGNORE); } } -void benchmark_osu_latency_KokkosComm_sendrecv(benchmark::State &state) { +void benchmark_osu_latency_MPI_isendirecv(benchmark::State &state) { int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); MPI_Comm_size(MPI_COMM_WORLD, &size); if (size != 2) { - state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); + state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); } - auto mode = KokkosComm::DefaultCommMode(); - auto space = Kokkos::DefaultExecutionSpace(); using view_type = Kokkos::View; view_type a("A", state.range(0)); while (state.KeepRunning()) { - do_iteration( - state, MPI_COMM_WORLD, - osu_latency_Kokkos_Comm_sendrecv, mode, - space, rank, a); + do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_isendirecv, rank, a); } state.counters["bytes"] = a.size() * 2; } +template +void osu_latency_MPI_sendrecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { + if (rank == 0) { + MPI_Recv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, + MPI_STATUS_IGNORE); + } else if (rank == 1) { + MPI_Send(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm); + } +} + void benchmark_osu_latency_MPI_sendrecv(benchmark::State &state) { int rank, size; MPI_Comm_rank(MPI_COMM_WORLD, &rank); @@ -85,8 +136,18 @@ BENCHMARK(benchmark_osu_latency_KokkosComm_sendrecv) ->Unit(benchmark::kMicrosecond) ->RangeMultiplier(8) ->Range(1, 1 << 28); -BENCHMARK(benchmark_osu_latency_MPI_sendrecv) +BENCHMARK(benchmark_osu_latency_Kokkos_Comm_mpi_sendrecv) ->UseManualTime() ->Unit(benchmark::kMicrosecond) ->RangeMultiplier(8) ->Range(1, 1 << 28); +BENCHMARK(benchmark_osu_latency_MPI_isendirecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(8) + ->Range(1, 1 << 28); +BENCHMARK(benchmark_osu_latency_MPI_sendrecv) + ->UseManualTime() + ->Unit(benchmark::kMicrosecond) + ->RangeMultiplier(8) + ->Range(1, 1 << 28); \ No newline at end of file diff --git a/perf_tests/test_osu_latency_isendirecv.cpp b/perf_tests/test_osu_latency_isendirecv.cpp deleted file mode 100644 index a62c8acc..00000000 --- a/perf_tests/test_osu_latency_isendirecv.cpp +++ /dev/null @@ -1,96 +0,0 @@ -//@HEADER -// ************************************************************************ -// -// Kokkos v. 4.0 -// Copyright (2022) National Technology & Engineering -// Solutions of Sandia, LLC (NTESS). -// -// Under the terms of Contract DE-NA0003525 with NTESS, -// the U.S. Government retains certain rights in this software. -// -// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. -// See https://kokkos.org/LICENSE for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//@HEADER - -// Adapted from the OSU Benchmarks -// Copyright (c) 2002-2024 the Network-Based Computing Laboratory -// (NBCL), The Ohio State University. - -#include "test_utils.hpp" -#include "KokkosComm.hpp" - -template -void osu_latency_Kokkos_Comm_isendirecv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, - const View &v) { - KokkosComm::Handle<> h{space, comm}; - - if (rank == 0) { - KokkosComm::wait(KokkosComm::isend(h, v, 1, 1)); - } else if (rank == 1) { - KokkosComm::wait(KokkosComm::irecv(h, v, 0, 1)); - } -} - -template -void osu_latency_MPI_isendirecv(benchmark::State &, MPI_Comm comm, int rank, const View &v) { - MPI_Request sendreq, recvreq; - if (rank == 0) { - MPI_Irecv(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 1, 0, comm, &recvreq); - MPI_Wait(&recvreq, MPI_STATUS_IGNORE); - } else if (rank == 1) { - MPI_Isend(v.data(), v.size(), KokkosComm::Impl::mpi_type(), 0, 0, comm, &sendreq); - MPI_Wait(&sendreq, MPI_STATUS_IGNORE); - } -} - -void benchmark_osu_latency_KokkosComm_isendirecv(benchmark::State &state) { - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size != 2) { - state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); - } - - auto mode = KokkosComm::DefaultCommMode(); - auto space = Kokkos::DefaultExecutionSpace(); - using view_type = Kokkos::View; - view_type a("A", state.range(0)); - - while (state.KeepRunning()) { - do_iteration( - state, MPI_COMM_WORLD, - osu_latency_Kokkos_Comm_isendirecv, mode, - space, rank, a); - } - state.counters["bytes"] = a.size() * 2; -} - -void benchmark_osu_latency_MPI_isendirecv(benchmark::State &state) { - int rank, size; - MPI_Comm_rank(MPI_COMM_WORLD, &rank); - MPI_Comm_size(MPI_COMM_WORLD, &size); - if (size != 2) { - state.SkipWithError("benchmark_osu_latency_MPI needs exactly 2 ranks"); - } - - using view_type = Kokkos::View; - view_type a("A", state.range(0)); - - while (state.KeepRunning()) { - do_iteration(state, MPI_COMM_WORLD, osu_latency_MPI_isendirecv, rank, a); - } - state.counters["bytes"] = a.size() * 2; -} - -BENCHMARK(benchmark_osu_latency_KokkosComm_isendirecv) - ->UseManualTime() - ->Unit(benchmark::kMicrosecond) - ->RangeMultiplier(8) - ->Range(1, 1 << 28); -BENCHMARK(benchmark_osu_latency_MPI_isendirecv) - ->UseManualTime() - ->Unit(benchmark::kMicrosecond) - ->RangeMultiplier(8) - ->Range(1, 1 << 28); diff --git a/src/KokkosComm_fwd.hpp b/src/KokkosComm_fwd.hpp index 55acea88..b91f4a27 100644 --- a/src/KokkosComm_fwd.hpp +++ b/src/KokkosComm_fwd.hpp @@ -40,10 +40,10 @@ namespace Impl { template -struct Irecv; +struct Recv; template -struct Isend; +struct Send; template struct Barrier; diff --git a/src/KokkosComm_point_to_point.hpp b/src/KokkosComm_point_to_point.hpp index 65aefa66..5cdd9824 100644 --- a/src/KokkosComm_point_to_point.hpp +++ b/src/KokkosComm_point_to_point.hpp @@ -25,26 +25,26 @@ namespace KokkosComm { template -Req irecv(Handle &h, RecvView &rv, int src, int tag) { - return Impl::Irecv::execute(h, rv, src, tag); +Req recv(Handle &h, RecvView &rv, int src, int tag) { + return Impl::Recv::execute(h, rv, src, tag); } template -Req irecv(RecvView &rv, int src, int tag) { - return irecv(Handle{}, rv, src, tag); +Req recv(RecvView &rv, int src, int tag) { + return recv(Handle{}, rv, src, tag); } template -Req isend(Handle &h, SendView &sv, int dest, int tag) { - return Impl::Isend::execute(h, sv, dest, tag); +Req send(Handle &h, SendView &sv, int dest, int tag) { + return Impl::Send::execute(h, sv, dest, tag); } template -Req isend(SendView &sv, int dest, int tag) { - return isend(Handle{}, sv, dest, tag); +Req send(SendView &sv, int dest, int tag) { + return send(Handle{}, sv, dest, tag); } } // namespace KokkosComm diff --git a/src/mpi/KokkosComm_mpi_irecv.hpp b/src/mpi/KokkosComm_mpi_irecv.hpp index 9f0a1cd1..3504f185 100644 --- a/src/mpi/KokkosComm_mpi_irecv.hpp +++ b/src/mpi/KokkosComm_mpi_irecv.hpp @@ -21,9 +21,9 @@ namespace KokkosComm { namespace Impl { -// irecv implementation for Mpi +// Recv implementation for Mpi template -struct Irecv { +struct Recv { static Req execute(Handle &h, const RecvView &rv, int src, int tag) { using KCT = KokkosComm::Traits; using KCPT = KokkosComm::PackTraits; diff --git a/src/mpi/KokkosComm_mpi_isend.hpp b/src/mpi/KokkosComm_mpi_isend.hpp index deefbcf0..eea987bd 100644 --- a/src/mpi/KokkosComm_mpi_isend.hpp +++ b/src/mpi/KokkosComm_mpi_isend.hpp @@ -63,9 +63,9 @@ Req isend_impl(Handle &h, const SendView &sv, int dest, int return req; } -// Implementation of KokkosComm::Isend +// Implementation of KokkosComm::Send template -struct Isend { +struct Send { static Req execute(Handle &h, const SendView &sv, int dest, int tag) { return isend_impl(h, sv, dest, tag); } diff --git a/unit_tests/CMakeLists.txt b/unit_tests/CMakeLists.txt index f0df264c..6a13c96f 100644 --- a/unit_tests/CMakeLists.txt +++ b/unit_tests/CMakeLists.txt @@ -36,7 +36,7 @@ endif() # Kokkos Comm tests set(KOKKOSCOMM_TEST_SOURCES) list(APPEND KOKKOSCOMM_TEST_SOURCES test_main.cpp - test_isendirecv.cpp + test_sendrecv.cpp test_barrier.cpp ) diff --git a/unit_tests/mpi/test_sendrecv.cpp b/unit_tests/mpi/test_sendrecv.cpp index a88dec6f..306d2301 100644 --- a/unit_tests/mpi/test_sendrecv.cpp +++ b/unit_tests/mpi/test_sendrecv.cpp @@ -22,13 +22,13 @@ namespace { template -class SendRecv : public testing::Test { +class MpiSendRecv : public testing::Test { public: using Scalar = T; }; using ScalarTypes = ::testing::Types, Kokkos::complex>; -TYPED_TEST_SUITE(SendRecv, ScalarTypes); +TYPED_TEST_SUITE(MpiSendRecv, ScalarTypes); template void send_comm_mode_1d_contig() { @@ -88,27 +88,27 @@ void send_comm_mode_1d_noncontig() { } } -TYPED_TEST(SendRecv, 1D_contig_standard) { +TYPED_TEST(MpiSendRecv, 1D_contig_standard) { send_comm_mode_1d_contig(); } -TYPED_TEST(SendRecv, 1D_contig_ready) { +TYPED_TEST(MpiSendRecv, 1D_contig_ready) { send_comm_mode_1d_contig(); } -TYPED_TEST(SendRecv, 1D_contig_synchronous) { +TYPED_TEST(MpiSendRecv, 1D_contig_synchronous) { send_comm_mode_1d_contig(); } -TYPED_TEST(SendRecv, 1D_noncontig_standard) { +TYPED_TEST(MpiSendRecv, 1D_noncontig_standard) { send_comm_mode_1d_noncontig(); } -TYPED_TEST(SendRecv, 1D_noncontig_ready) { +TYPED_TEST(MpiSendRecv, 1D_noncontig_ready) { send_comm_mode_1d_noncontig(); } -TYPED_TEST(SendRecv, 1D_noncontig_synchronous) { +TYPED_TEST(MpiSendRecv, 1D_noncontig_synchronous) { send_comm_mode_1d_noncontig(); } diff --git a/unit_tests/test_isendirecv.cpp b/unit_tests/test_sendrecv.cpp similarity index 81% rename from unit_tests/test_isendirecv.cpp rename to unit_tests/test_sendrecv.cpp index 3bbd3ecb..bbea7214 100644 --- a/unit_tests/test_isendirecv.cpp +++ b/unit_tests/test_sendrecv.cpp @@ -23,14 +23,14 @@ namespace { template -class IsendIrecv : public testing::Test { +class SendRecv : public testing::Test { public: using Scalar = T; }; using ScalarTypes = ::testing::Types, Kokkos::complex, int, unsigned, int64_t, size_t>; -TYPED_TEST_SUITE(IsendIrecv, ScalarTypes); +TYPED_TEST_SUITE(SendRecv, ScalarTypes); template void test_1d(const View1D &a) { @@ -46,12 +46,10 @@ void test_1d(const View1D &a) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::Req req = KokkosComm::isend(h, a, dst, 0); - KokkosComm::wait(req); + KokkosComm::wait(KokkosComm::send(h, a, dst, 0)); } else if (1 == h.rank()) { - int src = 0; - KokkosComm::Req req = KokkosComm::irecv(h, a, src, 0); - KokkosComm::wait(req); + int src = 0; + KokkosComm::wait(KokkosComm::recv(h, a, src, 0)); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); @@ -76,12 +74,10 @@ void test_2d(const View2D &a) { int dst = 1; Kokkos::parallel_for( policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); - KokkosComm::Req req = KokkosComm::isend(h, a, dst, 0); - KokkosComm::wait(req); + KokkosComm::wait(KokkosComm::send(h, a, dst, 0)); } else if (1 == h.rank()) { - int src = 0; - KokkosComm::Req req = KokkosComm::irecv(h, a, src, 0); - KokkosComm::wait(req); + int src = 0; + KokkosComm::wait(KokkosComm::recv(h, a, src, 0)); int errs; Kokkos::parallel_reduce( policy, KOKKOS_LAMBDA(int i, int j, int &lsum) { lsum += a(i, j) != Scalar(i * a.extent(0) + j); }, errs); @@ -89,12 +85,12 @@ void test_2d(const View2D &a) { } } -TYPED_TEST(IsendIrecv, 1D_contig) { +TYPED_TEST(SendRecv, 1D_contig) { auto a = ViewBuilder::view(contig{}, "a", 1013); test_1d(a); } -TYPED_TEST(IsendIrecv, 2D_contig) { +TYPED_TEST(SendRecv, 2D_contig) { auto a = ViewBuilder::view(contig{}, "a", 137, 17); test_2d(a); } From 007ea48f07ab8dd56a4aa2fd78c696d4cb437879 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 11 Jul 2024 14:00:30 -0600 Subject: [PATCH 11/18] Transport -> CommunicationSpace --- docs/api/core_recv.cpp | 2 +- src/KokkosComm_collective.hpp | 6 +++--- src/KokkosComm_concepts.hpp | 4 ++-- src/KokkosComm_fwd.hpp | 14 +++++++------- src/KokkosComm_point_to_point.hpp | 24 ++++++++++++------------ src/mpi/KokkosComm_mpi.hpp | 4 ++-- src/mpi/KokkosComm_mpi_barrier.hpp | 2 +- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/docs/api/core_recv.cpp b/docs/api/core_recv.cpp index c561fac3..32e2599f 100644 --- a/docs/api/core_recv.cpp +++ b/docs/api/core_recv.cpp @@ -1,4 +1,4 @@ Handle<> handle; Kokkos::View recv_view("recv_view", 100); -auto req = irecv(handle, recv_view, 1/*src*/, 0/*tag*/); +auto req = recv(handle, recv_view, 1/*src*/, 0/*tag*/); KokkosComm::wait(req); \ No newline at end of file diff --git a/src/KokkosComm_collective.hpp b/src/KokkosComm_collective.hpp index f4d9942b..b61b5f14 100644 --- a/src/KokkosComm_collective.hpp +++ b/src/KokkosComm_collective.hpp @@ -25,9 +25,9 @@ namespace KokkosComm { -template -void barrier(Handle &&h) { - Impl::Barrier{std::forward>(h)}; +template +void barrier(Handle &&h) { + Impl::Barrier{std::forward>(h)}; } } // namespace KokkosComm diff --git a/src/KokkosComm_concepts.hpp b/src/KokkosComm_concepts.hpp index acf5c1c0..19be6697 100644 --- a/src/KokkosComm_concepts.hpp +++ b/src/KokkosComm_concepts.hpp @@ -25,7 +25,7 @@ namespace KokkosComm { namespace Impl { // fallback - most types are not a KokkosComm transport template -struct is_transport : public std::false_type {}; +struct is_communication_space : public std::false_type {}; } // namespace Impl template @@ -35,6 +35,6 @@ template concept KokkosExecutionSpace = Kokkos::is_execution_space_v; template -concept Transport = KokkosComm::Impl::is_transport::value; +concept CommunicationSpace = KokkosComm::Impl::is_communication_space::value; } // namespace KokkosComm diff --git a/src/KokkosComm_fwd.hpp b/src/KokkosComm_fwd.hpp index b91f4a27..a9dcbcbc 100644 --- a/src/KokkosComm_fwd.hpp +++ b/src/KokkosComm_fwd.hpp @@ -24,27 +24,27 @@ namespace KokkosComm { #if defined(KOKKOSCOMM_ENABLE_MPI) class Mpi; -using DefaultTransport = Mpi; -using FallbackTransport = Mpi; +using DefaultCommunicationSpace = Mpi; +using FallbackCommunicationSpace = Mpi; #else #error at least one transport must be defined #endif -template +template class Req; -template +template class Handle; namespace Impl { template + CommunicationSpace CommSpace = DefaultCommunicationSpace> struct Recv; template + CommunicationSpace CommSpace = DefaultCommunicationSpace> struct Send; -template +template struct Barrier; } // namespace Impl diff --git a/src/KokkosComm_point_to_point.hpp b/src/KokkosComm_point_to_point.hpp index 5cdd9824..d9577d48 100644 --- a/src/KokkosComm_point_to_point.hpp +++ b/src/KokkosComm_point_to_point.hpp @@ -24,27 +24,27 @@ namespace KokkosComm { template -Req recv(Handle &h, RecvView &rv, int src, int tag) { - return Impl::Recv::execute(h, rv, src, tag); + CommunicationSpace CommSpace = DefaultCommunicationSpace> +Req recv(Handle &h, RecvView &rv, int src, int tag) { + return Impl::Recv::execute(h, rv, src, tag); } template -Req recv(RecvView &rv, int src, int tag) { - return recv(Handle{}, rv, src, tag); + CommunicationSpace CommSpace = DefaultCommunicationSpace> +Req recv(RecvView &rv, int src, int tag) { + return recv(Handle{}, rv, src, tag); } template -Req send(Handle &h, SendView &sv, int dest, int tag) { - return Impl::Send::execute(h, sv, dest, tag); + CommunicationSpace CommSpace = DefaultCommunicationSpace> +Req send(Handle &h, SendView &sv, int dest, int tag) { + return Impl::Send::execute(h, sv, dest, tag); } template -Req send(SendView &sv, int dest, int tag) { - return send(Handle{}, sv, dest, tag); + CommunicationSpace CommSpace = DefaultCommunicationSpace> +Req send(SendView &sv, int dest, int tag) { + return send(Handle{}, sv, dest, tag); } } // namespace KokkosComm diff --git a/src/mpi/KokkosComm_mpi.hpp b/src/mpi/KokkosComm_mpi.hpp index 67a06a48..c6db892a 100644 --- a/src/mpi/KokkosComm_mpi.hpp +++ b/src/mpi/KokkosComm_mpi.hpp @@ -41,8 +41,8 @@ struct Mpi { }; // struct Mpi -// KokkosComm::Mpi is a KokkosComm::Transport +// KokkosComm::Mpi is a KokkosComm::CommunicationSpace template <> -struct Impl::is_transport : public std::true_type {}; +struct Impl::is_communication_space : public std::true_type {}; } // namespace KokkosComm \ No newline at end of file diff --git a/src/mpi/KokkosComm_mpi_barrier.hpp b/src/mpi/KokkosComm_mpi_barrier.hpp index ef3ec5ec..58beb4bd 100644 --- a/src/mpi/KokkosComm_mpi_barrier.hpp +++ b/src/mpi/KokkosComm_mpi_barrier.hpp @@ -21,7 +21,7 @@ namespace KokkosComm { namespace Impl { -template +template struct Barrier { Barrier(Handle &&h) { h.space().fence("KokkosComm::Impl::Barrier"); From 497f33c554f863492d15921b2733a66e3f51c3a8 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Thu, 11 Jul 2024 15:07:24 -0600 Subject: [PATCH 12/18] Communication mode rework --- perf_tests/test_2dhalo.cpp | 6 +-- perf_tests/test_osu_latency.cpp | 1 - perf_tests/test_sendrecv.cpp | 14 +++--- src/KokkosComm_collective.hpp | 3 +- src/KokkosComm_fwd.hpp | 6 ++- src/mpi/KokkosComm_mpi_commmode.hpp | 68 ++++++++++++++++++++--------- src/mpi/KokkosComm_mpi_isend.hpp | 29 ++++++------ src/mpi/KokkosComm_mpi_send.hpp | 34 ++++++++------- unit_tests/mpi/test_isendrecv.cpp | 28 ++++++------ unit_tests/mpi/test_sendrecv.cpp | 28 ++++++------ 10 files changed, 123 insertions(+), 94 deletions(-) diff --git a/perf_tests/test_2dhalo.cpp b/perf_tests/test_2dhalo.cpp index 073eb683..fe2c7faa 100644 --- a/perf_tests/test_2dhalo.cpp +++ b/perf_tests/test_2dhalo.cpp @@ -82,14 +82,12 @@ void benchmark_2dhalo(benchmark::State &state) { const int ry = rank / rs; if (rank < rs * rs) { - auto mode = KokkosComm::DefaultCommMode(); auto space = Kokkos::DefaultExecutionSpace(); // grid of elements, each with 3 properties, and a radius-1 halo grid_type grid("", nx + 2, ny + 2, nprops); while (state.KeepRunning()) { - do_iteration(state, MPI_COMM_WORLD, - send_recv, mode, space, nx, - ny, rx, ry, rs, grid); + do_iteration(state, MPI_COMM_WORLD, send_recv, space, nx, ny, rx, ry, + rs, grid); } } else { while (state.KeepRunning()) { diff --git a/perf_tests/test_osu_latency.cpp b/perf_tests/test_osu_latency.cpp index 68a6a248..2758f993 100644 --- a/perf_tests/test_osu_latency.cpp +++ b/perf_tests/test_osu_latency.cpp @@ -63,7 +63,6 @@ void benchmark_osu_latency_Kokkos_Comm_mpi_sendrecv(benchmark::State &state) { state.SkipWithError("benchmark_osu_latency_KokkosComm needs exactly 2 ranks"); } - auto mode = KokkosComm::DefaultCommMode(); auto space = Kokkos::DefaultExecutionSpace(); using view_type = Kokkos::View; view_type a("A", state.range(0)); diff --git a/perf_tests/test_sendrecv.cpp b/perf_tests/test_sendrecv.cpp index ae39a747..11fb220e 100644 --- a/perf_tests/test_sendrecv.cpp +++ b/perf_tests/test_sendrecv.cpp @@ -18,14 +18,14 @@ #include "KokkosComm.hpp" -template -void send_recv(benchmark::State &, MPI_Comm comm, const Mode &mode, const Space &space, int rank, const View &v) { +template +void send_recv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, const View &v) { if (0 == rank) { - KokkosComm::mpi::send(space, v, 1, 0, comm); + KokkosComm::mpi::send(space, v, 1, 0, comm, Mode{}); KokkosComm::mpi::recv(space, v, 1, 0, comm); } else if (1 == rank) { KokkosComm::mpi::recv(space, v, 0, 0, comm); - KokkosComm::mpi::send(space, v, 0, 0, comm); + KokkosComm::mpi::send(space, v, 0, 0, comm, Mode{}); } } @@ -39,15 +39,13 @@ void benchmark_sendrecv(benchmark::State &state) { using Scalar = double; - auto mode = KokkosComm::DefaultCommMode(); + using Mode = KokkosComm::mpi::DefaultCommMode; auto space = Kokkos::DefaultExecutionSpace(); using view_type = Kokkos::View; view_type a("", 1000000); while (state.KeepRunning()) { - do_iteration(state, MPI_COMM_WORLD, - send_recv, mode, space, rank, - a); + do_iteration(state, MPI_COMM_WORLD, send_recv, space, rank, a); } state.SetBytesProcessed(sizeof(Scalar) * state.iterations() * a.size() * 2); diff --git a/src/KokkosComm_collective.hpp b/src/KokkosComm_collective.hpp index b61b5f14..3e2f6e6a 100644 --- a/src/KokkosComm_collective.hpp +++ b/src/KokkosComm_collective.hpp @@ -25,7 +25,8 @@ namespace KokkosComm { -template +template void barrier(Handle &&h) { Impl::Barrier{std::forward>(h)}; } diff --git a/src/KokkosComm_fwd.hpp b/src/KokkosComm_fwd.hpp index a9dcbcbc..354056b1 100644 --- a/src/KokkosComm_fwd.hpp +++ b/src/KokkosComm_fwd.hpp @@ -33,7 +33,8 @@ using FallbackCommunicationSpace = Mpi; template class Req; -template +template class Handle; namespace Impl { @@ -44,7 +45,8 @@ struct Recv; template struct Send; -template +template struct Barrier; } // namespace Impl diff --git a/src/mpi/KokkosComm_mpi_commmode.hpp b/src/mpi/KokkosComm_mpi_commmode.hpp index 489bfe01..d0249d9d 100644 --- a/src/mpi/KokkosComm_mpi_commmode.hpp +++ b/src/mpi/KokkosComm_mpi_commmode.hpp @@ -16,26 +16,52 @@ #pragma once -namespace KokkosComm::mpi { -// Scoped enumeration to specify the communication mode of a sending operation. +#include + // See section 3.4 of the MPI standard for a complete specification. -enum class CommMode { - // Default mode: lets the user override the send operations behavior at - // compile-time. E.g., this can be set to mode "Synchronous" for debug - // builds by defining KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE. - Default, - // Standard mode: MPI implementation decides whether outgoing messages will - // be buffered. Send operations can be started whether or not a matching - // receive has been started. They may complete before a matching receive is - // started. Standard mode is non-local: successful completion of the send - // operation may depend on the occurrence of a matching receive. - Standard, - // Ready mode: Send operations may be started only if the matching receive is - // already started. - Ready, - // Synchronous mode: Send operations complete successfully only if a matching - // receive is started, and the receive operation has started to receive the - // message sent. - Synchronous, -}; + +namespace KokkosComm::mpi { +// Standard mode: MPI implementation decides whether outgoing messages will +// be buffered. Send operations can be started whether or not a matching +// receive has been started. They may complete before a matching receive is +// started. Standard mode is non-local: successful completion of the send +// operation may depend on the occurrence of a matching receive. +struct CommModeStandard {}; + +// Ready mode: Send operations may be started only if the matching receive is +// already started. +struct CommModeReady {}; + +// Synchronous mode: Send operations complete successfully only if a matching +// receive is started, and the receive operation has started to receive the +// message sent. +struct CommModeSynchronous {}; + +// Default mode: lets the user override the send operations behavior at +// compile-time. E.g., this can be set to mode "Synchronous" for debug +// builds by defining KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE. +#ifdef KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE +using DefaultCommMode = CommModeSynchronous; +#else +using DefaultCommMode = CommModeStandard; +#endif + +template +struct is_communication_mode : std::false_type {}; + +template <> +struct is_communication_mode : std::true_type {}; + +template <> +struct is_communication_mode : std::true_type {}; + +template <> +struct is_communication_mode : std::true_type {}; + +template +inline constexpr bool is_communication_mode_v = is_communication_mode::value; + +template +concept CommunicationMode = is_communication_mode_v; + } // namespace KokkosComm::mpi \ No newline at end of file diff --git a/src/mpi/KokkosComm_mpi_isend.hpp b/src/mpi/KokkosComm_mpi_isend.hpp index eea987bd..ccea4578 100644 --- a/src/mpi/KokkosComm_mpi_isend.hpp +++ b/src/mpi/KokkosComm_mpi_isend.hpp @@ -26,22 +26,18 @@ namespace KokkosComm { namespace Impl { -template -Req isend_impl(Handle &h, const SendView &sv, int dest, int tag) { +template +Req isend_impl(Handle &h, const SendView &sv, int dest, int tag, SendMode) { auto mpi_isend_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, MPI_Comm mpi_comm, MPI_Request *mpi_req) { - if constexpr (SendMode == mpi::CommMode::Standard) { + if constexpr (std::is_same_v) { MPI_Isend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } else if constexpr (SendMode == mpi::CommMode::Ready) { + } else if constexpr (std::is_same_v) { MPI_Irsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } else if constexpr (SendMode == mpi::CommMode::Synchronous) { + } else if constexpr (std::is_same_v) { MPI_Issend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); - } else if constexpr (SendMode == mpi::CommMode::Default) { -#ifdef KOKKOSCOMM_FORCE_SYNCHRONOUS_MODE - MPI_Issend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); -#else - MPI_Isend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm, mpi_req); -#endif + } else { + static_assert(std::is_void_v, "unexpected communication mode"); } }; @@ -67,7 +63,7 @@ Req isend_impl(Handle &h, const SendView &sv, int dest, int template struct Send { static Req execute(Handle &h, const SendView &sv, int dest, int tag) { - return isend_impl(h, sv, dest, tag); + return isend_impl(h, sv, dest, tag, mpi::DefaultCommMode{}); } }; @@ -75,9 +71,14 @@ struct Send { namespace mpi { -template +template +Req isend(Handle &h, const SendView &sv, int dest, int tag, SendMode) { + return KokkosComm::Impl::isend_impl(h, sv, dest, tag, SendMode{}); +} + +template Req isend(Handle &h, const SendView &sv, int dest, int tag) { - return KokkosComm::Impl::isend_impl(h, sv, dest, tag); + return isend(h, sv, dest, tag, DefaultCommMode{}); } template diff --git a/src/mpi/KokkosComm_mpi_send.hpp b/src/mpi/KokkosComm_mpi_send.hpp index e335ced1..3ac1a4c3 100644 --- a/src/mpi/KokkosComm_mpi_send.hpp +++ b/src/mpi/KokkosComm_mpi_send.hpp @@ -24,19 +24,21 @@ namespace KokkosComm::mpi { -template -void send(const SendMode &, const SendView &sv, int dest, int tag, MPI_Comm comm) { +template +void send(const SendView &sv, int dest, int tag, MPI_Comm comm, SendMode) { Kokkos::Tools::pushRegion("KokkosComm::Impl::send"); using KCT = typename KokkosComm::Traits; auto mpi_send_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, MPI_Comm mpi_comm) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { MPI_Send(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { MPI_Rsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { MPI_Ssend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); + } else { + static_assert(std::is_void_v, "unexpected communication mode"); } }; @@ -50,25 +52,22 @@ void send(const SendMode &, const SendView &sv, int dest, int tag, MPI_Comm comm Kokkos::Tools::popRegion(); } -template -void send(const SendView &sv, int dest, int tag, MPI_Comm comm) { - send(KokkosComm::DefaultCommMode(), sv, dest, tag, comm); -} - -template -void send(const SendMode &, const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { +template +void send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm, SendMode) { Kokkos::Tools::pushRegion("KokkosComm::Impl::send"); using Packer = typename KokkosComm::PackTraits::packer_type; auto mpi_send_fn = [](void *mpi_view, int mpi_count, MPI_Datatype mpi_datatype, int mpi_dest, int mpi_tag, MPI_Comm mpi_comm) { - if constexpr (std::is_same_v) { + if constexpr (std::is_same_v) { MPI_Send(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { MPI_Rsend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); - } else if constexpr (std::is_same_v) { + } else if constexpr (std::is_same_v) { MPI_Ssend(mpi_view, mpi_count, mpi_datatype, mpi_dest, mpi_tag, mpi_comm); + } else { + static_assert(std::is_void_v, "unexpected communication mode"); } }; @@ -84,4 +83,9 @@ void send(const SendMode &, const ExecSpace &space, const SendView &sv, int dest Kokkos::Tools::popRegion(); } +template +void send(const ExecSpace &space, const SendView &sv, int dest, int tag, MPI_Comm comm) { + send(space, sv, dest, tag, comm, DefaultCommMode{}); +} + } // namespace KokkosComm::mpi diff --git a/unit_tests/mpi/test_isendrecv.cpp b/unit_tests/mpi/test_isendrecv.cpp index f2ffbbf4..bc03ef8f 100644 --- a/unit_tests/mpi/test_isendrecv.cpp +++ b/unit_tests/mpi/test_isendrecv.cpp @@ -21,6 +21,8 @@ namespace { +using namespace KokkosComm::mpi; + template class IsendRecv : public testing::Test { public: @@ -31,9 +33,9 @@ using ScalarTypes = ::testing::Types, Kokkos::complex, int, unsigned, int64_t, size_t>; TYPED_TEST_SUITE(IsendRecv, ScalarTypes); -template +template void isend_comm_mode_1d_contig() { - if (IsendMode == KokkosComm::mpi::CommMode::Ready) { + if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping test for ready-mode send"; } @@ -48,7 +50,7 @@ void isend_comm_mode_1d_contig() { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::Req req = KokkosComm::mpi::isend(h, a, dst, 0); + KokkosComm::Req req = KokkosComm::mpi::isend(h, a, dst, 0, IsendMode{}); KokkosComm::wait(req); } else if (1 == h.rank()) { int src = 0; @@ -60,9 +62,9 @@ void isend_comm_mode_1d_contig() { } } -template +template void isend_comm_mode_1d_noncontig() { - if (IsendMode == KokkosComm::mpi::CommMode::Ready) { + if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping test for ready-mode send"; } @@ -79,7 +81,7 @@ void isend_comm_mode_1d_noncontig() { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::Req req = KokkosComm::mpi::isend(h, a, dst, 0); + KokkosComm::Req req = KokkosComm::mpi::isend(h, a, dst, 0, IsendMode{}); KokkosComm::wait(req); } else if (1 == h.rank()) { int src = 0; @@ -92,27 +94,25 @@ void isend_comm_mode_1d_noncontig() { } TYPED_TEST(IsendRecv, 1D_contig_standard) { - isend_comm_mode_1d_contig(); + isend_comm_mode_1d_contig(); } -TYPED_TEST(IsendRecv, 1D_contig_ready) { - isend_comm_mode_1d_contig(); -} +TYPED_TEST(IsendRecv, 1D_contig_ready) { isend_comm_mode_1d_contig(); } TYPED_TEST(IsendRecv, 1D_contig_synchronous) { - isend_comm_mode_1d_contig(); + isend_comm_mode_1d_contig(); } TYPED_TEST(IsendRecv, 1D_noncontig_standard) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } TYPED_TEST(IsendRecv, 1D_noncontig_ready) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } TYPED_TEST(IsendRecv, 1D_noncontig_synchronous) { - isend_comm_mode_1d_noncontig(); + isend_comm_mode_1d_noncontig(); } } // namespace diff --git a/unit_tests/mpi/test_sendrecv.cpp b/unit_tests/mpi/test_sendrecv.cpp index 306d2301..19a9e455 100644 --- a/unit_tests/mpi/test_sendrecv.cpp +++ b/unit_tests/mpi/test_sendrecv.cpp @@ -21,6 +21,8 @@ namespace { +using namespace KokkosComm::mpi; + template class MpiSendRecv : public testing::Test { public: @@ -30,9 +32,9 @@ class MpiSendRecv : public testing::Test { using ScalarTypes = ::testing::Types, Kokkos::complex>; TYPED_TEST_SUITE(MpiSendRecv, ScalarTypes); -template +template void send_comm_mode_1d_contig() { - if (SendMode == KokkosComm::mpi::CommMode::Ready) { + if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping test for ready-mode send"; } @@ -49,7 +51,7 @@ void send_comm_mode_1d_contig() { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::mpi::send(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); + KokkosComm::mpi::send(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD, SendMode{}); } else if (1 == rank) { int src = 0; KokkosComm::mpi::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); @@ -60,9 +62,9 @@ void send_comm_mode_1d_contig() { } } -template +template void send_comm_mode_1d_noncontig() { - if (SendMode == KokkosComm::mpi::CommMode::Ready) { + if constexpr (std::is_same_v) { GTEST_SKIP() << "Skipping test for ready-mode send"; } @@ -77,7 +79,7 @@ void send_comm_mode_1d_noncontig() { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::mpi::send(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD); + KokkosComm::mpi::send(Kokkos::DefaultExecutionSpace(), a, dst, 0, MPI_COMM_WORLD, SendMode{}); } else if (1 == rank) { int src = 0; KokkosComm::mpi::recv(Kokkos::DefaultExecutionSpace(), a, src, 0, MPI_COMM_WORLD); @@ -89,27 +91,25 @@ void send_comm_mode_1d_noncontig() { } TYPED_TEST(MpiSendRecv, 1D_contig_standard) { - send_comm_mode_1d_contig(); + send_comm_mode_1d_contig(); } -TYPED_TEST(MpiSendRecv, 1D_contig_ready) { - send_comm_mode_1d_contig(); -} +TYPED_TEST(MpiSendRecv, 1D_contig_ready) { send_comm_mode_1d_contig(); } TYPED_TEST(MpiSendRecv, 1D_contig_synchronous) { - send_comm_mode_1d_contig(); + send_comm_mode_1d_contig(); } TYPED_TEST(MpiSendRecv, 1D_noncontig_standard) { - send_comm_mode_1d_noncontig(); + send_comm_mode_1d_noncontig(); } TYPED_TEST(MpiSendRecv, 1D_noncontig_ready) { - send_comm_mode_1d_noncontig(); + send_comm_mode_1d_noncontig(); } TYPED_TEST(MpiSendRecv, 1D_noncontig_synchronous) { - send_comm_mode_1d_noncontig(); + send_comm_mode_1d_noncontig(); } } // namespace From 9c8066a82280d4cb5130d50ebb1d20f96571f7a1 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 24 Jul 2024 12:12:47 -0600 Subject: [PATCH 13/18] Fix includes --- src/KokkosComm_traits.hpp | 4 ++-- src/mpi/impl/KokkosComm_pack_traits.hpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/KokkosComm_traits.hpp b/src/KokkosComm_traits.hpp index d66e6be9..2b18b1d4 100644 --- a/src/KokkosComm_traits.hpp +++ b/src/KokkosComm_traits.hpp @@ -20,10 +20,10 @@ #pragma once -#include "impl/KokkosComm_concepts.hpp" - #include +#include "KokkosComm_concepts.hpp" + namespace KokkosComm { template diff --git a/src/mpi/impl/KokkosComm_pack_traits.hpp b/src/mpi/impl/KokkosComm_pack_traits.hpp index 553b217f..8f1610c3 100644 --- a/src/mpi/impl/KokkosComm_pack_traits.hpp +++ b/src/mpi/impl/KokkosComm_pack_traits.hpp @@ -17,9 +17,9 @@ #pragma once #include "KokkosComm_traits.hpp" +#include "KokkosComm_concepts.hpp" -#include "impl/KokkosComm_concepts.hpp" -#include "impl/KokkosComm_packer.hpp" +#include "KokkosComm_packer.hpp" namespace KokkosComm { From 780d653dc4c1cf1dbd77e663858277a40f50e60b Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 24 Jul 2024 12:46:10 -0600 Subject: [PATCH 14/18] Build tests in compile-only test --- .github/workflows/linux-compileonly.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/linux-compileonly.yaml b/.github/workflows/linux-compileonly.yaml index 43a199e2..78cfa52c 100644 --- a/.github/workflows/linux-compileonly.yaml +++ b/.github/workflows/linux-compileonly.yaml @@ -66,5 +66,5 @@ jobs: cmake --build "$KOKKOS_BUILD" --parallel $(nproc) -t install - name: Build Kokkos Comm run: | - cmake -S "$COMM_SRC" -B "$COMM_BUILD" -DCMAKE_CXX_COMPILER="$KOKKOS_SRC/bin/nvcc_wrapper" -DKokkos_ROOT="$KOKKOS_INSTALL" -DCMAKE_BUILD_TYPE=Release + cmake -S "$COMM_SRC" -B "$COMM_BUILD" -DCMAKE_CXX_COMPILER="$KOKKOS_SRC/bin/nvcc_wrapper" -DKokkos_ROOT="$KOKKOS_INSTALL" -DCMAKE_BUILD_TYPE=Release -DKokkosComm_ENABLE_TESTS=ON -DKokkosComm_ENABLE_PERFTESTS=ON VERBOSE=1 cmake --build "$COMM_BUILD" From 595794ac47633e2c894a12df71451c0caf5f4a38 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 24 Jul 2024 12:49:29 -0600 Subject: [PATCH 15/18] Config.cmake.in: propagate KOKKOSCOMM_ENABLE_MPI --- cmake/Config.cmake.in | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cmake/Config.cmake.in b/cmake/Config.cmake.in index 877f3e24..47388a45 100644 --- a/cmake/Config.cmake.in +++ b/cmake/Config.cmake.in @@ -8,6 +8,8 @@ include(CMakeFindDependencyMacro) find_dependency(MPI) find_dependency(Kokkos) +set(KOKKOSCOMM_ENABLE_MPI @KOKKOSCOMM_ENABLE_MPI@) + ## FIXME: do we need this? set(KokkosComm_INCLUDE_DIR "@CMAKE_INSTALL_FULL_INCLUDEDIR@" ) set(KokkosComm_DATA_DIR "@CMAKE_INSTALL_PREFIX@/@RELATIVE_DATA_INSTALL_DIR@" ) From dc7464219c0643eda48668df8fa98bc99238e170 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Mon, 12 Aug 2024 09:15:01 -0600 Subject: [PATCH 16/18] Remove tags from core interface --- docs/api/core.rst | 12 ++++-------- docs/api/core_recv.cpp | 2 +- docs/api/core_send.cpp | 5 ++--- perf_tests/test_2dhalo.cpp | 18 +++++++++--------- perf_tests/test_osu_latency.cpp | 4 ++-- src/KokkosComm_point_to_point.hpp | 16 ++++++++-------- src/mpi/KokkosComm_mpi_irecv.hpp | 7 ++++--- src/mpi/KokkosComm_mpi_isend.hpp | 5 +++-- src/mpi/impl/KokkosComm_tags.hpp | 5 +++++ unit_tests/test_sendrecv.cpp | 8 ++++---- 10 files changed, 42 insertions(+), 40 deletions(-) create mode 100644 src/mpi/impl/KokkosComm_tags.hpp diff --git a/docs/api/core.rst b/docs/api/core.rst index e2fd6b88..743ea41b 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -6,7 +6,7 @@ Point-to-point .. cpp:namespace:: KokkosComm -.. cpp:function:: template Req send(Handle &h, SendView &sv, int dest, int tag) +.. cpp:function:: template Req send(Handle &h, SendView &sv, int dest) Initiates a non-blocking send operation. @@ -20,11 +20,10 @@ Point-to-point :param h: A handle to the execution space and transport mechanism. :param sv: The Kokkos view to send. :param dest: The destination rank. - :param tag: The message tag. :return: A request object for the non-blocking send operation. -.. cpp:function:: template Req send(SendView &sv, int dest, int tag) +.. cpp:function:: template Req send(SendView &sv, int dest) Initiates a non-blocking send operation using a default handle. @@ -37,7 +36,6 @@ Point-to-point :param sv: The Kokkos view to send. :param dest: The destination rank. - :param tag: The message tag. :return: A request object for the non-blocking send operation. @@ -48,7 +46,7 @@ Point-to-point -.. cpp:function:: template Req recv(Handle &h, RecvView &rv, int src, int tag) +.. cpp:function:: template Req recv(Handle &h, RecvView &rv, int src) Initiates a non-blocking receive operation. @@ -62,7 +60,6 @@ Point-to-point :param h: A handle to the execution space and transport mechanism. :param rv: The Kokkos view where the received data will be stored. :param src: The source rank from which to receive data. - :param tag: The message tag to identify the communication. :return: A request object of type `Req` representing the non-blocking receive operation. @@ -77,7 +74,7 @@ Point-to-point -.. cpp:function:: template Req recv(RecvView &rv, int src, int tag) +.. cpp:function:: template Req recv(RecvView &rv, int src) Initiates a non-blocking receive operation using a default handle. @@ -90,7 +87,6 @@ Point-to-point :param rv: The Kokkos view where the received data will be stored. :param src: The source rank from which to receive data. - :param tag: The message tag to identify the communication. :return: A request object of type `Req` representing the non-blocking receive operation. diff --git a/docs/api/core_recv.cpp b/docs/api/core_recv.cpp index 32e2599f..8b949bca 100644 --- a/docs/api/core_recv.cpp +++ b/docs/api/core_recv.cpp @@ -1,4 +1,4 @@ Handle<> handle; Kokkos::View recv_view("recv_view", 100); -auto req = recv(handle, recv_view, 1/*src*/, 0/*tag*/); +auto req = recv(handle, recv_view, 1/*src*/); KokkosComm::wait(req); \ No newline at end of file diff --git a/docs/api/core_send.cpp b/docs/api/core_send.cpp index 516c0797..b23d28fe 100644 --- a/docs/api/core_send.cpp +++ b/docs/api/core_send.cpp @@ -14,16 +14,15 @@ Kokkos::parallel_for("fill_data", Kokkos::RangePolicy(0, 100), KOKKOS // Destination rank and message tag int dest = 1; -int tag = 42; // Create a handle KokkosComm::Handle<> handle; // Same as Handle // Initiate a non-blocking send with a handle -auto req1 = send(handle, data, dest, tag); +auto req1 = send(handle, data, dest); // Initiate a non-blocking send with a default handle -auto req2 = send(data, dest, tag); +auto req2 = send(data, dest); // Wait for the requests to complete (assuming a wait function exists) KokkosComm::wait(req1); diff --git a/perf_tests/test_2dhalo.cpp b/perf_tests/test_2dhalo.cpp index fe2c7faa..3c67e901 100644 --- a/perf_tests/test_2dhalo.cpp +++ b/perf_tests/test_2dhalo.cpp @@ -50,15 +50,15 @@ void send_recv(benchmark::State &, MPI_Comm comm, const Space &space, int nx, in std::vector> reqs; // std::cerr << get_rank(rx, ry) << " -> " << get_rank(xp1, ry) << "\n"; - reqs.push_back(KokkosComm::send(h, xp1_s, get_rank(xp1, ry), 0)); - reqs.push_back(KokkosComm::send(h, xm1_s, get_rank(xm1, ry), 1)); - reqs.push_back(KokkosComm::send(h, yp1_s, get_rank(rx, yp1), 2)); - reqs.push_back(KokkosComm::send(h, ym1_s, get_rank(rx, ym1), 3)); - - reqs.push_back(KokkosComm::recv(h, xm1_r, get_rank(xm1, ry), 0)); - reqs.push_back(KokkosComm::recv(h, xp1_r, get_rank(xp1, ry), 1)); - reqs.push_back(KokkosComm::recv(h, ym1_r, get_rank(rx, ym1), 2)); - reqs.push_back(KokkosComm::recv(h, yp1_r, get_rank(rx, yp1), 3)); + reqs.push_back(KokkosComm::send(h, xp1_s, get_rank(xp1, ry))); + reqs.push_back(KokkosComm::send(h, xm1_s, get_rank(xm1, ry))); + reqs.push_back(KokkosComm::send(h, yp1_s, get_rank(rx, yp1))); + reqs.push_back(KokkosComm::send(h, ym1_s, get_rank(rx, ym1))); + + reqs.push_back(KokkosComm::recv(h, xm1_r, get_rank(xm1, ry))); + reqs.push_back(KokkosComm::recv(h, xp1_r, get_rank(xp1, ry))); + reqs.push_back(KokkosComm::recv(h, ym1_r, get_rank(rx, ym1))); + reqs.push_back(KokkosComm::recv(h, yp1_r, get_rank(rx, yp1))); // wait for comm KokkosComm::wait_all(reqs); diff --git a/perf_tests/test_osu_latency.cpp b/perf_tests/test_osu_latency.cpp index 2758f993..a6628ed0 100644 --- a/perf_tests/test_osu_latency.cpp +++ b/perf_tests/test_osu_latency.cpp @@ -24,9 +24,9 @@ template void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm, KokkosComm::Handle<> &h, const View &v) { if (h.rank() == 0) { - KokkosComm::wait(KokkosComm::send(h, v, 1, 1)); + KokkosComm::wait(KokkosComm::send(h, v, 1)); } else if (h.rank() == 1) { - KokkosComm::wait(KokkosComm::recv(h, v, 0, 1)); + KokkosComm::wait(KokkosComm::recv(h, v, 0)); } } diff --git a/src/KokkosComm_point_to_point.hpp b/src/KokkosComm_point_to_point.hpp index d9577d48..e53b7181 100644 --- a/src/KokkosComm_point_to_point.hpp +++ b/src/KokkosComm_point_to_point.hpp @@ -25,26 +25,26 @@ namespace KokkosComm { template -Req recv(Handle &h, RecvView &rv, int src, int tag) { - return Impl::Recv::execute(h, rv, src, tag); +Req recv(Handle &h, RecvView &rv, int src) { + return Impl::Recv::execute(h, rv, src); } template -Req recv(RecvView &rv, int src, int tag) { - return recv(Handle{}, rv, src, tag); +Req recv(RecvView &rv, int src) { + return recv(Handle{}, rv, src); } template -Req send(Handle &h, SendView &sv, int dest, int tag) { - return Impl::Send::execute(h, sv, dest, tag); +Req send(Handle &h, SendView &sv, int dest) { + return Impl::Send::execute(h, sv, dest); } template -Req send(SendView &sv, int dest, int tag) { - return send(Handle{}, sv, dest, tag); +Req send(SendView &sv, int dest) { + return send(Handle{}, sv, dest); } } // namespace KokkosComm diff --git a/src/mpi/KokkosComm_mpi_irecv.hpp b/src/mpi/KokkosComm_mpi_irecv.hpp index 3504f185..1881957c 100644 --- a/src/mpi/KokkosComm_mpi_irecv.hpp +++ b/src/mpi/KokkosComm_mpi_irecv.hpp @@ -17,6 +17,7 @@ #pragma once #include "KokkosComm_mpi.hpp" +#include "impl/KokkosComm_tags.hpp" namespace KokkosComm { @@ -24,7 +25,7 @@ namespace Impl { // Recv implementation for Mpi template struct Recv { - static Req execute(Handle &h, const RecvView &rv, int src, int tag) { + static Req execute(Handle &h, const RecvView &rv, int src) { using KCT = KokkosComm::Traits; using KCPT = KokkosComm::PackTraits; using Packer = typename KCPT::packer_type; @@ -35,13 +36,13 @@ struct Recv { Req req; if (KokkosComm::is_contiguous(rv)) { space.fence("fence before irecv"); - MPI_Irecv(KokkosComm::data_handle(rv), 1, view_mpi_type(rv), src, tag, h.mpi_comm(), + MPI_Irecv(KokkosComm::data_handle(rv), 1, view_mpi_type(rv), src, POINTTOPOINT_TAG, h.mpi_comm(), &req.mpi_request()); // TODO: probably best to just use the scalar type req.extend_view_lifetime(rv); } else { Args args = Packer::allocate_packed_for(space, "TODO", rv); space.fence("fence before irecv"); - MPI_Irecv(args.view.data(), args.count, args.datatype, src, tag, h.mpi_comm(), &req.mpi_request()); + MPI_Irecv(args.view.data(), args.count, args.datatype, src, POINTTOPOINT_TAG, h.mpi_comm(), &req.mpi_request()); req.extend_view_lifetime(rv); // implicitly extends args.view lifetime since lambda holds a copy req.call_after_mpi_wait([=]() { Packer::unpack_into(space, rv, args.view); }); diff --git a/src/mpi/KokkosComm_mpi_isend.hpp b/src/mpi/KokkosComm_mpi_isend.hpp index ccea4578..ef8f542d 100644 --- a/src/mpi/KokkosComm_mpi_isend.hpp +++ b/src/mpi/KokkosComm_mpi_isend.hpp @@ -20,6 +20,7 @@ #include "KokkosComm_mpi.hpp" #include "impl/KokkosComm_types.hpp" +#include "impl/KokkosComm_tags.hpp" #include "KokkosComm_mpi_commmode.hpp" namespace KokkosComm { @@ -62,8 +63,8 @@ Req isend_impl(Handle &h, const SendView &sv, int dest, int // Implementation of KokkosComm::Send template struct Send { - static Req execute(Handle &h, const SendView &sv, int dest, int tag) { - return isend_impl(h, sv, dest, tag, mpi::DefaultCommMode{}); + static Req execute(Handle &h, const SendView &sv, int dest) { + return isend_impl(h, sv, dest, POINTTOPOINT_TAG, mpi::DefaultCommMode{}); } }; diff --git a/src/mpi/impl/KokkosComm_tags.hpp b/src/mpi/impl/KokkosComm_tags.hpp new file mode 100644 index 00000000..e1230bb0 --- /dev/null +++ b/src/mpi/impl/KokkosComm_tags.hpp @@ -0,0 +1,5 @@ +#pragma once + +namespace KokkosComm::Impl { +constexpr int POINTTOPOINT_TAG = 17; +} \ No newline at end of file diff --git a/unit_tests/test_sendrecv.cpp b/unit_tests/test_sendrecv.cpp index bbea7214..ac18e63a 100644 --- a/unit_tests/test_sendrecv.cpp +++ b/unit_tests/test_sendrecv.cpp @@ -46,10 +46,10 @@ void test_1d(const View1D &a) { int dst = 1; Kokkos::parallel_for( a.extent(0), KOKKOS_LAMBDA(const int i) { a(i) = i; }); - KokkosComm::wait(KokkosComm::send(h, a, dst, 0)); + KokkosComm::wait(KokkosComm::send(h, a, dst)); } else if (1 == h.rank()) { int src = 0; - KokkosComm::wait(KokkosComm::recv(h, a, src, 0)); + KokkosComm::wait(KokkosComm::recv(h, a, src)); int errs; Kokkos::parallel_reduce( a.extent(0), KOKKOS_LAMBDA(const int &i, int &lsum) { lsum += a(i) != Scalar(i); }, errs); @@ -74,10 +74,10 @@ void test_2d(const View2D &a) { int dst = 1; Kokkos::parallel_for( policy, KOKKOS_LAMBDA(int i, int j) { a(i, j) = i * a.extent(0) + j; }); - KokkosComm::wait(KokkosComm::send(h, a, dst, 0)); + KokkosComm::wait(KokkosComm::send(h, a, dst)); } else if (1 == h.rank()) { int src = 0; - KokkosComm::wait(KokkosComm::recv(h, a, src, 0)); + KokkosComm::wait(KokkosComm::recv(h, a, src)); int errs; Kokkos::parallel_reduce( policy, KOKKOS_LAMBDA(int i, int j, int &lsum) { lsum += a(i, j) != Scalar(i * a.extent(0) + j); }, errs); From 2c2bc5f863f276ddc156991634304aeb57a95cc7 Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Mon, 12 Aug 2024 11:20:55 -0600 Subject: [PATCH 17/18] Move headers to KokkosComm/ --- perf_tests/test_2dhalo.cpp | 2 +- perf_tests/test_main.cpp | 2 +- perf_tests/test_osu_latency.cpp | 2 +- perf_tests/test_sendrecv.cpp | 2 +- perf_tests/test_utils.hpp | 2 +- src/{ => KokkosComm}/KokkosComm.hpp | 28 +++++++++---------- .../collective.hpp} | 4 +-- .../comm_modes.hpp} | 0 .../concepts.hpp} | 0 .../fwd.hpp} | 2 +- .../impl/KokkosComm_contiguous.hpp | 4 +-- .../mpi/allgather.hpp} | 8 +++--- .../mpi/alltoall.hpp} | 8 +++--- .../mpi/barrier.hpp} | 2 +- .../mpi/commmode.hpp} | 0 .../mpi/handle.hpp} | 4 +-- .../mpi/impl/include_mpi.hpp} | 0 .../mpi/impl/pack_traits.hpp} | 6 ++-- .../mpi/impl/packer.hpp} | 8 +++--- .../mpi/impl/tags.hpp} | 0 .../mpi/impl/types.hpp} | 2 +- .../mpi/irecv.hpp} | 4 +-- .../mpi/isend.hpp} | 10 +++---- .../mpi/mpi.hpp} | 4 +-- .../mpi/recv.hpp} | 8 +++--- .../mpi/reduce.hpp} | 8 +++--- .../mpi/req.hpp} | 4 +-- .../mpi/send.hpp} | 6 ++-- .../point_to_point.hpp} | 4 +-- .../traits.hpp} | 4 +-- unit_tests/mpi/test_allgather.cpp | 2 +- unit_tests/mpi/test_alltoall.cpp | 2 +- unit_tests/mpi/test_isendrecv.cpp | 2 +- unit_tests/mpi/test_reduce.cpp | 2 +- unit_tests/mpi/test_sendrecv.cpp | 2 +- unit_tests/test_barrier.cpp | 2 +- unit_tests/test_main.cpp | 2 +- unit_tests/test_sendrecv.cpp | 2 +- 38 files changed, 77 insertions(+), 77 deletions(-) rename src/{ => KokkosComm}/KokkosComm.hpp (61%) rename src/{KokkosComm_collective.hpp => KokkosComm/collective.hpp} (93%) rename src/{KokkosComm_comm_modes.hpp => KokkosComm/comm_modes.hpp} (100%) rename src/{KokkosComm_concepts.hpp => KokkosComm/concepts.hpp} (100%) rename src/{KokkosComm_fwd.hpp => KokkosComm/fwd.hpp} (97%) rename src/{ => KokkosComm}/impl/KokkosComm_contiguous.hpp (97%) rename src/{mpi/KokkosComm_mpi_allgather.hpp => KokkosComm/mpi/allgather.hpp} (95%) rename src/{mpi/KokkosComm_mpi_alltoall.hpp => KokkosComm/mpi/alltoall.hpp} (96%) rename src/{mpi/KokkosComm_mpi_barrier.hpp => KokkosComm/mpi/barrier.hpp} (96%) rename src/{mpi/KokkosComm_mpi_commmode.hpp => KokkosComm/mpi/commmode.hpp} (100%) rename src/{mpi/KokkosComm_mpi_handle.hpp => KokkosComm/mpi/handle.hpp} (95%) rename src/{mpi/impl/KokkosComm_include_mpi.hpp => KokkosComm/mpi/impl/include_mpi.hpp} (100%) rename src/{mpi/impl/KokkosComm_pack_traits.hpp => KokkosComm/mpi/impl/pack_traits.hpp} (90%) rename src/{mpi/impl/KokkosComm_packer.hpp => KokkosComm/mpi/impl/packer.hpp} (96%) rename src/{mpi/impl/KokkosComm_tags.hpp => KokkosComm/mpi/impl/tags.hpp} (100%) rename src/{mpi/impl/KokkosComm_types.hpp => KokkosComm/mpi/impl/types.hpp} (99%) rename src/{mpi/KokkosComm_mpi_irecv.hpp => KokkosComm/mpi/irecv.hpp} (97%) rename src/{mpi/KokkosComm_mpi_isend.hpp => KokkosComm/mpi/isend.hpp} (95%) rename src/{mpi/KokkosComm_mpi.hpp => KokkosComm/mpi/mpi.hpp} (93%) rename src/{mpi/KokkosComm_mpi_recv.hpp => KokkosComm/mpi/recv.hpp} (93%) rename src/{mpi/KokkosComm_mpi_reduce.hpp => KokkosComm/mpi/reduce.hpp} (95%) rename src/{mpi/KokkosComm_mpi_req.hpp => KokkosComm/mpi/req.hpp} (97%) rename src/{mpi/KokkosComm_mpi_send.hpp => KokkosComm/mpi/send.hpp} (96%) rename src/{KokkosComm_point_to_point.hpp => KokkosComm/point_to_point.hpp} (96%) rename src/{KokkosComm_traits.hpp => KokkosComm/traits.hpp} (96%) diff --git a/perf_tests/test_2dhalo.cpp b/perf_tests/test_2dhalo.cpp index 3c67e901..4cb4f324 100644 --- a/perf_tests/test_2dhalo.cpp +++ b/perf_tests/test_2dhalo.cpp @@ -16,7 +16,7 @@ #include "test_utils.hpp" -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" #include diff --git a/perf_tests/test_main.cpp b/perf_tests/test_main.cpp index 4a78cab0..cc087348 100644 --- a/perf_tests/test_main.cpp +++ b/perf_tests/test_main.cpp @@ -14,7 +14,7 @@ // //@HEADER -#include "mpi/impl/KokkosComm_include_mpi.hpp" +#include "KokkosComm/mpi/impl/include_mpi.hpp" #include #include diff --git a/perf_tests/test_osu_latency.cpp b/perf_tests/test_osu_latency.cpp index a6628ed0..e9ce1f0f 100644 --- a/perf_tests/test_osu_latency.cpp +++ b/perf_tests/test_osu_latency.cpp @@ -19,7 +19,7 @@ // (NBCL), The Ohio State University. #include "test_utils.hpp" -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" template void osu_latency_Kokkos_Comm_sendrecv(benchmark::State &, MPI_Comm, KokkosComm::Handle<> &h, const View &v) { diff --git a/perf_tests/test_sendrecv.cpp b/perf_tests/test_sendrecv.cpp index 11fb220e..c81ae24f 100644 --- a/perf_tests/test_sendrecv.cpp +++ b/perf_tests/test_sendrecv.cpp @@ -16,7 +16,7 @@ #include "test_utils.hpp" -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" template void send_recv(benchmark::State &, MPI_Comm comm, const Space &space, int rank, const View &v) { diff --git a/perf_tests/test_utils.hpp b/perf_tests/test_utils.hpp index 5df3b826..c022f91c 100644 --- a/perf_tests/test_utils.hpp +++ b/perf_tests/test_utils.hpp @@ -20,7 +20,7 @@ #include -#include "mpi/impl/KokkosComm_include_mpi.hpp" +#include "KokkosComm/mpi/impl/include_mpi.hpp" // F is a function that takes (state, MPI_Comm, args...) template diff --git a/src/KokkosComm.hpp b/src/KokkosComm/KokkosComm.hpp similarity index 61% rename from src/KokkosComm.hpp rename to src/KokkosComm/KokkosComm.hpp index cdb33a43..b69f58bc 100644 --- a/src/KokkosComm.hpp +++ b/src/KokkosComm/KokkosComm.hpp @@ -16,28 +16,28 @@ #pragma once -#include "KokkosComm_fwd.hpp" +#include "fwd.hpp" // transport declarations // TODO: could probably be moved to a per-transport file to be included #if defined(KOKKOSCOMM_ENABLE_MPI) -#include "mpi/KokkosComm_mpi.hpp" -#include "mpi/KokkosComm_mpi_send.hpp" -#include "mpi/KokkosComm_mpi_allgather.hpp" -#include "mpi/KokkosComm_mpi_alltoall.hpp" -#include "mpi/KokkosComm_mpi_barrier.hpp" -#include "mpi/KokkosComm_mpi_handle.hpp" -#include "mpi/KokkosComm_mpi_irecv.hpp" -#include "mpi/KokkosComm_mpi_isend.hpp" -#include "mpi/KokkosComm_mpi_recv.hpp" -#include "mpi/KokkosComm_mpi_reduce.hpp" +#include "mpi/mpi.hpp" +#include "mpi/send.hpp" +#include "mpi/allgather.hpp" +#include "mpi/alltoall.hpp" +#include "mpi/barrier.hpp" +#include "mpi/handle.hpp" +#include "mpi/irecv.hpp" +#include "mpi/isend.hpp" +#include "mpi/recv.hpp" +#include "mpi/reduce.hpp" #else #error at least one transport must be defined #endif -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_point_to_point.hpp" -#include "KokkosComm_collective.hpp" +#include "concepts.hpp" +#include "point_to_point.hpp" +#include "collective.hpp" #include diff --git a/src/KokkosComm_collective.hpp b/src/KokkosComm/collective.hpp similarity index 93% rename from src/KokkosComm_collective.hpp rename to src/KokkosComm/collective.hpp index 3e2f6e6a..c7125c0c 100644 --- a/src/KokkosComm_collective.hpp +++ b/src/KokkosComm/collective.hpp @@ -20,8 +20,8 @@ #include -#include "KokkosComm_fwd.hpp" -#include "KokkosComm_concepts.hpp" +#include "fwd.hpp" +#include "concepts.hpp" namespace KokkosComm { diff --git a/src/KokkosComm_comm_modes.hpp b/src/KokkosComm/comm_modes.hpp similarity index 100% rename from src/KokkosComm_comm_modes.hpp rename to src/KokkosComm/comm_modes.hpp diff --git a/src/KokkosComm_concepts.hpp b/src/KokkosComm/concepts.hpp similarity index 100% rename from src/KokkosComm_concepts.hpp rename to src/KokkosComm/concepts.hpp diff --git a/src/KokkosComm_fwd.hpp b/src/KokkosComm/fwd.hpp similarity index 97% rename from src/KokkosComm_fwd.hpp rename to src/KokkosComm/fwd.hpp index 354056b1..0bd3c255 100644 --- a/src/KokkosComm_fwd.hpp +++ b/src/KokkosComm/fwd.hpp @@ -18,7 +18,7 @@ #include -#include "KokkosComm_concepts.hpp" +#include "concepts.hpp" #include "KokkosComm_config.hpp" namespace KokkosComm { diff --git a/src/impl/KokkosComm_contiguous.hpp b/src/KokkosComm/impl/KokkosComm_contiguous.hpp similarity index 97% rename from src/impl/KokkosComm_contiguous.hpp rename to src/KokkosComm/impl/KokkosComm_contiguous.hpp index 65ff9cfd..4caa7f10 100644 --- a/src/impl/KokkosComm_contiguous.hpp +++ b/src/KokkosComm/impl/KokkosComm_contiguous.hpp @@ -20,8 +20,8 @@ #include -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_traits.hpp" +#include "concepts.hpp" +#include "traits.hpp" namespace KokkosComm::Impl { diff --git a/src/mpi/KokkosComm_mpi_allgather.hpp b/src/KokkosComm/mpi/allgather.hpp similarity index 95% rename from src/mpi/KokkosComm_mpi_allgather.hpp rename to src/KokkosComm/mpi/allgather.hpp index f248d058..f3e930dc 100644 --- a/src/mpi/KokkosComm_mpi_allgather.hpp +++ b/src/KokkosComm/mpi/allgather.hpp @@ -18,10 +18,10 @@ #include -#include "KokkosComm_traits.hpp" -#include "impl/KokkosComm_pack_traits.hpp" -#include "impl/KokkosComm_include_mpi.hpp" -#include "impl/KokkosComm_types.hpp" +#include "KokkosComm/traits.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" +#include "impl/types.hpp" namespace KokkosComm::mpi { diff --git a/src/mpi/KokkosComm_mpi_alltoall.hpp b/src/KokkosComm/mpi/alltoall.hpp similarity index 96% rename from src/mpi/KokkosComm_mpi_alltoall.hpp rename to src/KokkosComm/mpi/alltoall.hpp index a2bdf7d3..b2b29512 100644 --- a/src/mpi/KokkosComm_mpi_alltoall.hpp +++ b/src/KokkosComm/mpi/alltoall.hpp @@ -36,10 +36,10 @@ #include -#include "KokkosComm_traits.hpp" -#include "impl/KokkosComm_pack_traits.hpp" -#include "impl/KokkosComm_include_mpi.hpp" -#include "impl/KokkosComm_types.hpp" +#include "KokkosComm/traits.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" +#include "impl/types.hpp" namespace KokkosComm::Impl { template diff --git a/src/mpi/KokkosComm_mpi_barrier.hpp b/src/KokkosComm/mpi/barrier.hpp similarity index 96% rename from src/mpi/KokkosComm_mpi_barrier.hpp rename to src/KokkosComm/mpi/barrier.hpp index 58beb4bd..736f5c86 100644 --- a/src/mpi/KokkosComm_mpi_barrier.hpp +++ b/src/KokkosComm/mpi/barrier.hpp @@ -16,7 +16,7 @@ #pragma once -#include "KokkosComm_concepts.hpp" +#include "KokkosComm/concepts.hpp" namespace KokkosComm { diff --git a/src/mpi/KokkosComm_mpi_commmode.hpp b/src/KokkosComm/mpi/commmode.hpp similarity index 100% rename from src/mpi/KokkosComm_mpi_commmode.hpp rename to src/KokkosComm/mpi/commmode.hpp diff --git a/src/mpi/KokkosComm_mpi_handle.hpp b/src/KokkosComm/mpi/handle.hpp similarity index 95% rename from src/mpi/KokkosComm_mpi_handle.hpp rename to src/KokkosComm/mpi/handle.hpp index b4633da4..192beeb5 100644 --- a/src/mpi/KokkosComm_mpi_handle.hpp +++ b/src/KokkosComm/mpi/handle.hpp @@ -16,9 +16,9 @@ #pragma once -#include "KokkosComm_fwd.hpp" +#include "KokkosComm/fwd.hpp" -#include "KokkosComm_mpi_req.hpp" +#include "req.hpp" namespace KokkosComm { diff --git a/src/mpi/impl/KokkosComm_include_mpi.hpp b/src/KokkosComm/mpi/impl/include_mpi.hpp similarity index 100% rename from src/mpi/impl/KokkosComm_include_mpi.hpp rename to src/KokkosComm/mpi/impl/include_mpi.hpp diff --git a/src/mpi/impl/KokkosComm_pack_traits.hpp b/src/KokkosComm/mpi/impl/pack_traits.hpp similarity index 90% rename from src/mpi/impl/KokkosComm_pack_traits.hpp rename to src/KokkosComm/mpi/impl/pack_traits.hpp index 8f1610c3..e5c8c0cb 100644 --- a/src/mpi/impl/KokkosComm_pack_traits.hpp +++ b/src/KokkosComm/mpi/impl/pack_traits.hpp @@ -16,10 +16,10 @@ #pragma once -#include "KokkosComm_traits.hpp" -#include "KokkosComm_concepts.hpp" +#include "KokkosComm/traits.hpp" +#include "KokkosComm/concepts.hpp" -#include "KokkosComm_packer.hpp" +#include "packer.hpp" namespace KokkosComm { diff --git a/src/mpi/impl/KokkosComm_packer.hpp b/src/KokkosComm/mpi/impl/packer.hpp similarity index 96% rename from src/mpi/impl/KokkosComm_packer.hpp rename to src/KokkosComm/mpi/impl/packer.hpp index 56d43c88..e972d6ad 100644 --- a/src/mpi/impl/KokkosComm_packer.hpp +++ b/src/KokkosComm/mpi/impl/packer.hpp @@ -16,10 +16,10 @@ #pragma once -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_traits.hpp" -#include "KokkosComm_types.hpp" -#include "KokkosComm_include_mpi.hpp" +#include "KokkosComm/concepts.hpp" +#include "KokkosComm/traits.hpp" +#include "types.hpp" +#include "include_mpi.hpp" // todo: redo this using KokkosComm_contiguous diff --git a/src/mpi/impl/KokkosComm_tags.hpp b/src/KokkosComm/mpi/impl/tags.hpp similarity index 100% rename from src/mpi/impl/KokkosComm_tags.hpp rename to src/KokkosComm/mpi/impl/tags.hpp diff --git a/src/mpi/impl/KokkosComm_types.hpp b/src/KokkosComm/mpi/impl/types.hpp similarity index 99% rename from src/mpi/impl/KokkosComm_types.hpp rename to src/KokkosComm/mpi/impl/types.hpp index 4e46ce70..a2b798db 100644 --- a/src/mpi/impl/KokkosComm_types.hpp +++ b/src/KokkosComm/mpi/impl/types.hpp @@ -16,7 +16,7 @@ #pragma once -#include "KokkosComm_include_mpi.hpp" +#include "include_mpi.hpp" #include diff --git a/src/mpi/KokkosComm_mpi_irecv.hpp b/src/KokkosComm/mpi/irecv.hpp similarity index 97% rename from src/mpi/KokkosComm_mpi_irecv.hpp rename to src/KokkosComm/mpi/irecv.hpp index 1881957c..cd167a9c 100644 --- a/src/mpi/KokkosComm_mpi_irecv.hpp +++ b/src/KokkosComm/mpi/irecv.hpp @@ -16,8 +16,8 @@ #pragma once -#include "KokkosComm_mpi.hpp" -#include "impl/KokkosComm_tags.hpp" +#include "mpi.hpp" +#include "impl/tags.hpp" namespace KokkosComm { diff --git a/src/mpi/KokkosComm_mpi_isend.hpp b/src/KokkosComm/mpi/isend.hpp similarity index 95% rename from src/mpi/KokkosComm_mpi_isend.hpp rename to src/KokkosComm/mpi/isend.hpp index ef8f542d..09b301b7 100644 --- a/src/mpi/KokkosComm_mpi_isend.hpp +++ b/src/KokkosComm/mpi/isend.hpp @@ -16,12 +16,12 @@ #pragma once -#include "KokkosComm_traits.hpp" +#include "KokkosComm/traits.hpp" -#include "KokkosComm_mpi.hpp" -#include "impl/KokkosComm_types.hpp" -#include "impl/KokkosComm_tags.hpp" -#include "KokkosComm_mpi_commmode.hpp" +#include "mpi.hpp" +#include "impl/types.hpp" +#include "impl/tags.hpp" +#include "commmode.hpp" namespace KokkosComm { diff --git a/src/mpi/KokkosComm_mpi.hpp b/src/KokkosComm/mpi/mpi.hpp similarity index 93% rename from src/mpi/KokkosComm_mpi.hpp rename to src/KokkosComm/mpi/mpi.hpp index c6db892a..9a484db1 100644 --- a/src/mpi/KokkosComm_mpi.hpp +++ b/src/KokkosComm/mpi/mpi.hpp @@ -18,8 +18,8 @@ #include -#include "KokkosComm_concepts.hpp" -#include "impl/KokkosComm_include_mpi.hpp" +#include "../concepts.hpp" +#include "impl/include_mpi.hpp" namespace KokkosComm { diff --git a/src/mpi/KokkosComm_mpi_recv.hpp b/src/KokkosComm/mpi/recv.hpp similarity index 93% rename from src/mpi/KokkosComm_mpi_recv.hpp rename to src/KokkosComm/mpi/recv.hpp index d04d9625..10a9c97a 100644 --- a/src/mpi/KokkosComm_mpi_recv.hpp +++ b/src/KokkosComm/mpi/recv.hpp @@ -18,10 +18,10 @@ #include -#include "KokkosComm_concepts.hpp" -#include "KokkosComm_traits.hpp" -#include "impl/KokkosComm_pack_traits.hpp" -#include "impl/KokkosComm_include_mpi.hpp" +#include "KokkosComm/concepts.hpp" +#include "KokkosComm/traits.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" namespace KokkosComm::mpi { diff --git a/src/mpi/KokkosComm_mpi_reduce.hpp b/src/KokkosComm/mpi/reduce.hpp similarity index 95% rename from src/mpi/KokkosComm_mpi_reduce.hpp rename to src/KokkosComm/mpi/reduce.hpp index 1bc95ad5..056a2f03 100644 --- a/src/mpi/KokkosComm_mpi_reduce.hpp +++ b/src/KokkosComm/mpi/reduce.hpp @@ -18,10 +18,10 @@ #include -#include "KokkosComm_traits.hpp" -#include "impl/KokkosComm_pack_traits.hpp" -#include "impl/KokkosComm_include_mpi.hpp" -#include "impl/KokkosComm_types.hpp" +#include "KokkosComm/traits.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" +#include "impl/types.hpp" namespace KokkosComm::mpi { diff --git a/src/mpi/KokkosComm_mpi_req.hpp b/src/KokkosComm/mpi/req.hpp similarity index 97% rename from src/mpi/KokkosComm_mpi_req.hpp rename to src/KokkosComm/mpi/req.hpp index 1e027b2e..72d4b1cb 100644 --- a/src/mpi/KokkosComm_mpi_req.hpp +++ b/src/KokkosComm/mpi/req.hpp @@ -20,9 +20,9 @@ #include #include -#include "KokkosComm_fwd.hpp" +#include "KokkosComm/fwd.hpp" -#include "KokkosComm_mpi.hpp" +#include "mpi.hpp" namespace KokkosComm { diff --git a/src/mpi/KokkosComm_mpi_send.hpp b/src/KokkosComm/mpi/send.hpp similarity index 96% rename from src/mpi/KokkosComm_mpi_send.hpp rename to src/KokkosComm/mpi/send.hpp index 3ac1a4c3..70a05f17 100644 --- a/src/mpi/KokkosComm_mpi_send.hpp +++ b/src/KokkosComm/mpi/send.hpp @@ -18,9 +18,9 @@ #include -#include "KokkosComm_mpi_commmode.hpp" -#include "impl/KokkosComm_pack_traits.hpp" -#include "impl/KokkosComm_include_mpi.hpp" +#include "commmode.hpp" +#include "impl/pack_traits.hpp" +#include "impl/include_mpi.hpp" namespace KokkosComm::mpi { diff --git a/src/KokkosComm_point_to_point.hpp b/src/KokkosComm/point_to_point.hpp similarity index 96% rename from src/KokkosComm_point_to_point.hpp rename to src/KokkosComm/point_to_point.hpp index e53b7181..64486d91 100644 --- a/src/KokkosComm_point_to_point.hpp +++ b/src/KokkosComm/point_to_point.hpp @@ -18,8 +18,8 @@ #include -#include "KokkosComm_fwd.hpp" -#include "KokkosComm_concepts.hpp" +#include "fwd.hpp" +#include "concepts.hpp" namespace KokkosComm { diff --git a/src/KokkosComm_traits.hpp b/src/KokkosComm/traits.hpp similarity index 96% rename from src/KokkosComm_traits.hpp rename to src/KokkosComm/traits.hpp index 2b18b1d4..55cba517 100644 --- a/src/KokkosComm_traits.hpp +++ b/src/KokkosComm/traits.hpp @@ -15,14 +15,14 @@ //@HEADER /*! \brief Defines a common interface for Kokkos::View-like types - \file KokkosComm_traits.hpp + \file traits.hpp */ #pragma once #include -#include "KokkosComm_concepts.hpp" +#include "concepts.hpp" namespace KokkosComm { diff --git a/unit_tests/mpi/test_allgather.cpp b/unit_tests/mpi/test_allgather.cpp index 18e0cedb..a7798abb 100644 --- a/unit_tests/mpi/test_allgather.cpp +++ b/unit_tests/mpi/test_allgather.cpp @@ -16,7 +16,7 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { diff --git a/unit_tests/mpi/test_alltoall.cpp b/unit_tests/mpi/test_alltoall.cpp index db0103c2..da074c6d 100644 --- a/unit_tests/mpi/test_alltoall.cpp +++ b/unit_tests/mpi/test_alltoall.cpp @@ -16,7 +16,7 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { diff --git a/unit_tests/mpi/test_isendrecv.cpp b/unit_tests/mpi/test_isendrecv.cpp index bc03ef8f..c74179bb 100644 --- a/unit_tests/mpi/test_isendrecv.cpp +++ b/unit_tests/mpi/test_isendrecv.cpp @@ -17,7 +17,7 @@ #include #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { diff --git a/unit_tests/mpi/test_reduce.cpp b/unit_tests/mpi/test_reduce.cpp index 6eafcd48..82db420a 100644 --- a/unit_tests/mpi/test_reduce.cpp +++ b/unit_tests/mpi/test_reduce.cpp @@ -16,7 +16,7 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { diff --git a/unit_tests/mpi/test_sendrecv.cpp b/unit_tests/mpi/test_sendrecv.cpp index 19a9e455..c0031bba 100644 --- a/unit_tests/mpi/test_sendrecv.cpp +++ b/unit_tests/mpi/test_sendrecv.cpp @@ -17,7 +17,7 @@ #include #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { diff --git a/unit_tests/test_barrier.cpp b/unit_tests/test_barrier.cpp index 11c9c42e..c3d57f30 100644 --- a/unit_tests/test_barrier.cpp +++ b/unit_tests/test_barrier.cpp @@ -16,7 +16,7 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" namespace { diff --git a/unit_tests/test_main.cpp b/unit_tests/test_main.cpp index 680e322b..de3bd1ac 100644 --- a/unit_tests/test_main.cpp +++ b/unit_tests/test_main.cpp @@ -23,7 +23,7 @@ #include #include -#include "mpi/impl/KokkosComm_include_mpi.hpp" +#include "KokkosComm/mpi/impl/include_mpi.hpp" class MpiEnvironment : public ::testing::Environment { public: diff --git a/unit_tests/test_sendrecv.cpp b/unit_tests/test_sendrecv.cpp index ac18e63a..dc779297 100644 --- a/unit_tests/test_sendrecv.cpp +++ b/unit_tests/test_sendrecv.cpp @@ -16,7 +16,7 @@ #include -#include "KokkosComm.hpp" +#include "KokkosComm/KokkosComm.hpp" #include "view_builder.hpp" From 7856b43dfb6c46515d7b23f3b00a5d834307e09a Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Mon, 12 Aug 2024 11:34:22 -0600 Subject: [PATCH 18/18] Move perf tests to mpi/ --- perf_tests/CMakeLists.txt | 53 +++++++++-------------- perf_tests/mpi/CMakeLists.txt | 11 +++++ perf_tests/{ => mpi}/test_2dhalo.cpp | 0 perf_tests/{ => mpi}/test_main.cpp | 0 perf_tests/{ => mpi}/test_osu_latency.cpp | 0 perf_tests/{ => mpi}/test_sendrecv.cpp | 0 perf_tests/{ => mpi}/test_utils.hpp | 0 7 files changed, 31 insertions(+), 33 deletions(-) create mode 100644 perf_tests/mpi/CMakeLists.txt rename perf_tests/{ => mpi}/test_2dhalo.cpp (100%) rename perf_tests/{ => mpi}/test_main.cpp (100%) rename perf_tests/{ => mpi}/test_osu_latency.cpp (100%) rename perf_tests/{ => mpi}/test_sendrecv.cpp (100%) rename perf_tests/{ => mpi}/test_utils.hpp (100%) diff --git a/perf_tests/CMakeLists.txt b/perf_tests/CMakeLists.txt index a95ff234..82194239 100644 --- a/perf_tests/CMakeLists.txt +++ b/perf_tests/CMakeLists.txt @@ -10,40 +10,27 @@ if (NOT TARGET KokkosComm::KokkosComm) find_package(KokkosComm REQUIRED) endif() -if (KOKKOSCOMM_ENABLE_MPI) - - include(FetchContent) - - # Avoid warning about DOWNLOAD_EXTRACT_TIMESTAMP in CMake 3.24: - if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") - cmake_policy(SET CMP0135 NEW) - endif() - - set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) - FetchContent_Declare( - benchmark - URL https://github.com/google/benchmark/archive/refs/tags/v1.8.3.zip - ) - # FetchContent_MakeAvailable(benchmark) - # was making install install benchmark as well - # EXCLUDE_FROM_ALL here seems to be the magic - if (NOT benchmark_POPULATED) - FetchContent_Populate(benchmark) - add_subdirectory(${benchmark_SOURCE_DIR} ${benchmark_BINARY_DIR} EXCLUDE_FROM_ALL) - endif() - unset(BENCHMARK_ENABLE_TESTING) +include(FetchContent) +# Avoid warning about DOWNLOAD_EXTRACT_TIMESTAMP in CMake 3.24: +if (CMAKE_VERSION VERSION_GREATER_EQUAL "3.24.0") +cmake_policy(SET CMP0135 NEW) +endif() - add_executable(perf_test-main test_main.cpp - test_sendrecv.cpp - test_2dhalo.cpp - test_osu_latency.cpp - ) - if(KOKKOSCOMM_ENABLE_TESTS) - kokkoscomm_add_cxx_flags(TARGET perf_test-main) - endif() - target_link_libraries(perf_test-main KokkosComm::KokkosComm benchmark::benchmark) - add_test(NAME perf_test-main - COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./perf_test-main) +set(BENCHMARK_ENABLE_TESTING OFF CACHE BOOL "" FORCE) +FetchContent_Declare( + benchmark + URL https://github.com/google/benchmark/archive/refs/tags/v1.8.3.zip +) +# FetchContent_MakeAvailable(benchmark) +# was making install install benchmark as well +# EXCLUDE_FROM_ALL here seems to be the magic +if (NOT benchmark_POPULATED) + FetchContent_Populate(benchmark) + add_subdirectory(${benchmark_SOURCE_DIR} ${benchmark_BINARY_DIR} EXCLUDE_FROM_ALL) +endif() +unset(BENCHMARK_ENABLE_TESTING) +if (KOKKOSCOMM_ENABLE_MPI) + add_subdirectory(mpi) endif(KOKKOSCOMM_ENABLE_MPI) diff --git a/perf_tests/mpi/CMakeLists.txt b/perf_tests/mpi/CMakeLists.txt new file mode 100644 index 00000000..0a08c203 --- /dev/null +++ b/perf_tests/mpi/CMakeLists.txt @@ -0,0 +1,11 @@ +add_executable(perf_test-main test_main.cpp + test_sendrecv.cpp + test_2dhalo.cpp + test_osu_latency.cpp + ) + if(KOKKOSCOMM_ENABLE_TESTS) + kokkoscomm_add_cxx_flags(TARGET perf_test-main) + endif() + target_link_libraries(perf_test-main KokkosComm::KokkosComm benchmark::benchmark) + add_test(NAME perf_test-main + COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} 2 ./perf_test-main) \ No newline at end of file diff --git a/perf_tests/test_2dhalo.cpp b/perf_tests/mpi/test_2dhalo.cpp similarity index 100% rename from perf_tests/test_2dhalo.cpp rename to perf_tests/mpi/test_2dhalo.cpp diff --git a/perf_tests/test_main.cpp b/perf_tests/mpi/test_main.cpp similarity index 100% rename from perf_tests/test_main.cpp rename to perf_tests/mpi/test_main.cpp diff --git a/perf_tests/test_osu_latency.cpp b/perf_tests/mpi/test_osu_latency.cpp similarity index 100% rename from perf_tests/test_osu_latency.cpp rename to perf_tests/mpi/test_osu_latency.cpp diff --git a/perf_tests/test_sendrecv.cpp b/perf_tests/mpi/test_sendrecv.cpp similarity index 100% rename from perf_tests/test_sendrecv.cpp rename to perf_tests/mpi/test_sendrecv.cpp diff --git a/perf_tests/test_utils.hpp b/perf_tests/mpi/test_utils.hpp similarity index 100% rename from perf_tests/test_utils.hpp rename to perf_tests/mpi/test_utils.hpp