From 3414c914b11e5fc116b501de8e78c98fcdfb023c Mon Sep 17 00:00:00 2001 From: Carl Pearson Date: Wed, 22 May 2024 10:03:40 -0600 Subject: [PATCH] SpMV: Test NaN, fix NaN handling when beta=0 (#2188) * Test_Sparse_spmv_bsr.hpp: add NaNs to tests * handle NaN in spmv_beta_transpose when beta=0 * handle nan in SpmvMergeHierarchical when beta=0 * Test NaNs in Y, don't reuse modifed Y, catch NaNs in results test * remove unused include * explicit casting of zero * Test_sparse_spmv.hpp: remove unused nans parameter * KokkosSparse_spmv.hpp: CUDA11 can't detect this function always returns * Test_Sparse_spmv.hpp: remove unused variable * Run unit tests in correct execution space * Test_Sparse_spmv.hpp: remove unused type aliases * Kokkos::nan() -> KokkosKernels::Impl::quiet_NaN() --- common/impl/KokkosKernels_NaN.hpp | 44 ++++++++++++ sparse/impl/KokkosSparse_spmv_impl.hpp | 18 +++-- sparse/impl/KokkosSparse_spmv_impl_merge.hpp | 6 +- sparse/src/KokkosSparse_spmv.hpp | 1 + sparse/unit_test/Test_Sparse_spmv.hpp | 76 +++++++++++++++----- sparse/unit_test/Test_Sparse_spmv_bsr.hpp | 62 +++++++++++++++- 6 files changed, 178 insertions(+), 29 deletions(-) create mode 100644 common/impl/KokkosKernels_NaN.hpp diff --git a/common/impl/KokkosKernels_NaN.hpp b/common/impl/KokkosKernels_NaN.hpp new file mode 100644 index 0000000000..f319539a9f --- /dev/null +++ b/common/impl/KokkosKernels_NaN.hpp @@ -0,0 +1,44 @@ +//@HEADER +// ************************************************************************ +// +// Kokkos v. 4.0 +// Copyright (2022) National Technology & Engineering +// Solutions of Sandia, LLC (NTESS). +// +// Under the terms of Contract DE-NA0003525 with NTESS, +// the U.S. Government retains certain rights in this software. +// +// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions. +// See https://kokkos.org/LICENSE for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//@HEADER + +#ifndef KOKKOSKERNELS_NAN_HPP +#define KOKKOSKERNELS_NAN_HPP + +#include +#include + +namespace KokkosKernels::Impl { + +// This could be constexpr if Kokkos::complex ctor was +template +KOKKOS_INLINE_FUNCTION T quiet_NaN() { + if constexpr (std::is_same_v) { + return double(Kokkos::Experimental::quiet_NaN_v< + float>); // Kokkos::Experimetnal::quiet_NaN_v + // is undefined in + // device code + } else if constexpr (Kokkos::ArithTraits::is_complex) { + using value_type = typename T::value_type; + return T(quiet_NaN(), + quiet_NaN()); // Kokkos::complex ctor is not constexpr + } else { + return Kokkos::Experimental::quiet_NaN_v; + } +} + +} // namespace KokkosKernels::Impl + +#endif // KOKKOSKERNELS_NAN_HPP diff --git a/sparse/impl/KokkosSparse_spmv_impl.hpp b/sparse/impl/KokkosSparse_spmv_impl.hpp index 5f9cbea040..a2bb19a44c 100644 --- a/sparse/impl/KokkosSparse_spmv_impl.hpp +++ b/sparse/impl/KokkosSparse_spmv_impl.hpp @@ -450,8 +450,9 @@ static void spmv_beta_transpose(const execution_space& exec, const AMatrix& A, const XVector& x, typename YVector::const_value_type& beta, const YVector& y) { - using ordinal_type = typename AMatrix::non_const_ordinal_type; - using size_type = typename AMatrix::non_const_size_type; + using ordinal_type = typename AMatrix::non_const_ordinal_type; + using size_type = typename AMatrix::non_const_size_type; + using y_scalar_type = typename YVector::non_const_value_type; if (A.numRows() <= static_cast(0)) { return; @@ -459,7 +460,9 @@ static void spmv_beta_transpose(const execution_space& exec, // We need to scale y first ("scaling" by zero just means filling // with zeros), since the functor works by atomic-adding into y. - if (dobeta != 1) { + if (0 == dobeta || y_scalar_type(0) == beta) { + Kokkos::deep_copy(exec, y, y_scalar_type(0)); + } else if (dobeta != 1) { KokkosBlas::scal(exec, y, beta, y); } @@ -540,8 +543,9 @@ static void spmv_beta_transpose(const execution_space& exec, const AMatrix& A, const XVector& x, typename YVector::const_value_type& beta, const YVector& y) { - using ordinal_type = typename AMatrix::non_const_ordinal_type; - using size_type = typename AMatrix::non_const_size_type; + using ordinal_type = typename AMatrix::non_const_ordinal_type; + using size_type = typename AMatrix::non_const_size_type; + using y_scalar_type = typename YVector::non_const_value_type; if (A.numRows() <= static_cast(0)) { return; @@ -549,7 +553,9 @@ static void spmv_beta_transpose(const execution_space& exec, // We need to scale y first ("scaling" by zero just means filling // with zeros), since the functor works by atomic-adding into y. - if (dobeta != 1) { + if (0 == dobeta || y_scalar_type(0) == beta) { + Kokkos::deep_copy(exec, y, y_scalar_type(0)); + } else if (dobeta != 1) { KokkosBlas::scal(exec, y, beta, y); } diff --git a/sparse/impl/KokkosSparse_spmv_impl_merge.hpp b/sparse/impl/KokkosSparse_spmv_impl_merge.hpp index c49519cc3a..1717d190be 100644 --- a/sparse/impl/KokkosSparse_spmv_impl_merge.hpp +++ b/sparse/impl/KokkosSparse_spmv_impl_merge.hpp @@ -309,7 +309,11 @@ struct SpmvMergeHierarchical { static_assert(XVector::rank == 1, ""); static_assert(YVector::rank == 1, ""); - KokkosBlas::scal(y, beta, y); + if (y_value_type(0) == beta) { + Kokkos::deep_copy(space, y, y_value_type(0)); + } else { + KokkosBlas::scal(space, y, beta, y); + } /* determine launch parameters for different architectures On architectures where there is a natural execution hierarchy with true diff --git a/sparse/src/KokkosSparse_spmv.hpp b/sparse/src/KokkosSparse_spmv.hpp index f11b61f675..b59124df20 100644 --- a/sparse/src/KokkosSparse_spmv.hpp +++ b/sparse/src/KokkosSparse_spmv.hpp @@ -62,6 +62,7 @@ inline constexpr bool spmv_general_tpl_avail() { return spmv_mv_bsrmatrix_tpl_spec_avail::value; } + return false; } } // namespace Impl diff --git a/sparse/unit_test/Test_Sparse_spmv.hpp b/sparse/unit_test/Test_Sparse_spmv.hpp index 9afd941c93..2057a8ba14 100644 --- a/sparse/unit_test/Test_Sparse_spmv.hpp +++ b/sparse/unit_test/Test_Sparse_spmv.hpp @@ -23,6 +23,7 @@ #include #include #include +#include #include "KokkosKernels_default_types.hpp" @@ -86,7 +87,10 @@ struct fSPMV { void operator()(const int i, value_type &err) const { const mag_type error = AT::abs(expected_y(i) - y(i)); - if (error > eps * max_val) { + // only one is NaN or error is too large + if ((Kokkos::isnan(AT::abs(expected_y(i))) ^ + Kokkos::isnan(AT::abs(y(i)))) || + (error > eps * max_val)) { err++; Kokkos::printf("expected_y(%d)=%f, y(%d)=%f err=%e, max_error=%e\n", i, AT::abs(expected_y(i)), i, AT::abs(y(i)), error, @@ -116,6 +120,7 @@ void sequential_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y, using size_type_view_t = typename graph_t::row_map_type; using lno_view_t = typename graph_t::entries_type; using scalar_view_t = typename crsMat_t::values_type::non_const_type; + using y_scalar_t = typename y_vector_type::non_const_value_type; using size_type = typename size_type_view_t::non_const_value_type; using lno_t = typename lno_view_t::non_const_value_type; @@ -145,7 +150,13 @@ void sequential_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y, lno_t nr = input_mat.numRows(); // first, scale y by beta - for (size_t i = 0; i < h_y.extent(0); i++) h_y(i) *= beta; + for (size_t i = 0; i < h_y.extent(0); i++) { + if (beta == y_scalar_t(0)) { + h_y(i) = y_scalar_t(0); + } else { + h_y(i) *= beta; + } + } // then go through the matrix and accumulate the matrix-vector product for (lno_t row = 0; row < nr; ++row) { @@ -184,17 +195,19 @@ void check_spmv( const y_value_mag_type eps = 10 * Kokkos::ArithTraits::eps(); - bool transposed = (mode == "T") || (mode == "H"); - y_vector_type expected_y( - "expected", transposed ? input_mat.numCols() : input_mat.numRows()); + + y_vector_type actual_y("actual_y", y.extent(0)); + y_vector_type expected_y("expected_y", y.extent(0)); Kokkos::deep_copy(expected_y, y); + Kokkos::deep_copy(actual_y, y); Kokkos::fence(); sequential_spmv(input_mat, x, expected_y, alpha, beta, mode); bool threw = false; std::string msg; try { - KokkosSparse::spmv(handle, mode.data(), alpha, input_mat, x, beta, y); + KokkosSparse::spmv(handle, mode.data(), alpha, input_mat, x, beta, + actual_y); Kokkos::fence(); } catch (std::exception &e) { threw = true; @@ -203,11 +216,11 @@ void check_spmv( ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 1D, mode " << mode << ": threw exception:\n" << msg << '\n'; + int num_errors = 0; Kokkos::parallel_reduce( - "KokkosSparse::Test::spmv", my_exec_space(0, y.extent(0)), - fSPMV(expected_y, y, eps, max_val), - num_errors); + "KokkosSparse::Test::spmv", my_exec_space(0, actual_y.extent(0)), + fSPMV(expected_y, actual_y, eps, max_val), num_errors); if (num_errors > 0) printf("KokkosSparse::Test::spmv: %i errors of %i with params: %lf %lf\n", num_errors, y.extent_int(0), y_value_trait::abs(alpha), @@ -266,10 +279,9 @@ void check_spmv_mv( auto y_spmv = Kokkos::subview(y, Kokkos::ALL(), i); int num_errors = 0; - Kokkos::parallel_reduce( - "KokkosSparse::Test::spmv_mv", my_exec_space(0, y_i.extent(0)), - fSPMV(y_i, y_spmv, eps, max_val), - num_errors); + Kokkos::parallel_reduce("KokkosSparse::Test::spmv_mv", + my_exec_space(0, y_i.extent(0)), + fSPMV(y_i, y_spmv, eps, max_val), num_errors); if (num_errors > 0) std::cout << "KokkosSparse::Test::spmv_mv: " << num_errors << " errors of " << y_i.extent_int(0) << " for mv " << i @@ -404,6 +416,7 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, using mag_t = typename Kokkos::ArithTraits::mag_type; using handle_t = KokkosSparse::SPMVHandle; + using y_policy = Kokkos::RangePolicy; constexpr mag_t max_x = static_cast(1); constexpr mag_t max_y = static_cast(1); @@ -419,18 +432,35 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, const lno_t max_nnz_per_row = numRows ? (nnz / numRows + row_size_variance) : 0; + // Create vectors with and without nans x_vector_type input_x("x", nc); - y_vector_type output_y("y", nr); x_vector_type input_xt("x", nr); - y_vector_type output_yt("y", nc); + y_vector_type input_y("y", nr), input_y_nans("y_nans", nr); + y_vector_type input_yt("y", nc), input_yt_nans("y_nans", nc); Kokkos::Random_XorShift64_Pool rand_pool( 13718); Kokkos::fill_random(input_x, rand_pool, randomUpperBound(max_x)); - Kokkos::fill_random(output_y, rand_pool, randomUpperBound(max_y)); + Kokkos::fill_random(input_y, rand_pool, randomUpperBound(max_y)); Kokkos::fill_random(input_xt, rand_pool, randomUpperBound(max_x)); - Kokkos::fill_random(output_yt, rand_pool, randomUpperBound(max_y)); + Kokkos::fill_random(input_yt, rand_pool, randomUpperBound(max_y)); + + // sprinkle in some nans + Kokkos::deep_copy(input_y_nans, input_y); + Kokkos::deep_copy(input_yt_nans, input_yt); + Kokkos::parallel_for( + y_policy(0, input_y_nans.extent(0)), KOKKOS_LAMBDA(const size_t i) { + if (0 == (i % 19)) { + input_y_nans(i) = KokkosKernels::Impl::quiet_NaN(); + } + }); + Kokkos::parallel_for( + y_policy(0, input_yt_nans.extent(0)), KOKKOS_LAMBDA(const size_t i) { + if (0 == (i % 23)) { + input_yt_nans(i) = KokkosKernels::Impl::quiet_NaN(); + } + }); // We also need to bound the values // in the matrix to bound the cancellations @@ -457,8 +487,12 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, for (double beta : testAlphaBeta) { mag_t max_error = beta * max_y + alpha * max_nnz_per_row * max_val * max_x; - Test::check_spmv(&handle, input_mat, input_x, output_y, alpha, beta, + Test::check_spmv(&handle, input_mat, input_x, input_y, alpha, beta, mode, max_error); + if (0 == beta) { + Test::check_spmv(&handle, input_mat, input_x, input_y_nans, alpha, + beta, mode, max_error); + } } } } @@ -468,8 +502,12 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz, // hoping the transpose won't have a long column... mag_t max_error = beta * max_y + alpha * max_nnz_per_row * max_val * max_x; - Test::check_spmv(&handle, input_mat, input_xt, output_yt, alpha, beta, + Test::check_spmv(&handle, input_mat, input_xt, input_yt, alpha, beta, mode, max_error); + if (0 == beta) { + Test::check_spmv(&handle, input_mat, input_x, input_yt_nans, alpha, + beta, mode, max_error); + } } } } diff --git a/sparse/unit_test/Test_Sparse_spmv_bsr.hpp b/sparse/unit_test/Test_Sparse_spmv_bsr.hpp index cb42f5c2e4..e9b23298f9 100644 --- a/sparse/unit_test/Test_Sparse_spmv_bsr.hpp +++ b/sparse/unit_test/Test_Sparse_spmv_bsr.hpp @@ -41,6 +41,7 @@ #include #include #include "KokkosKernels_default_types.hpp" +#include #include "KokkosSparse_spmv.hpp" #include "KokkosSparse_BsrMatrix.hpp" @@ -327,10 +328,15 @@ spmv_random(const char *mode, const int blockSize, const int blockRows, /*! \brief create random x and y multivectors for a given matrix and spmv mode */ template -auto random_vecs_for_spmv(const char *mode, const Bsr &a) { +auto random_vecs_for_spmv(const char *mode, const Bsr &a, + const bool nans = false) + -> std::tuple::type, + typename VectorTypeFor::type> { using scalar_type = typename Bsr::non_const_value_type; using vector_type = typename VectorTypeFor::type; using execution_space = typename Bsr::execution_space; + using policy_type = + Kokkos::RangePolicy; size_t nx = a.numCols() * a.blockDim(); size_t ny = a.numRows() * a.blockDim(); @@ -344,6 +350,21 @@ auto random_vecs_for_spmv(const char *mode, const Bsr &a) { Kokkos::fill_random(x, random, max_x()); Kokkos::fill_random(y, random, max_y()); + if (nans) { + Kokkos::parallel_for( + policy_type(0, x.extent(0)), KOKKOS_LAMBDA(size_t i) { + if (0 == (i % 17)) { + x(i) = KokkosKernels::Impl::quiet_NaN(); + } + }); + Kokkos::parallel_for( + policy_type(0, y.extent(0)), KOKKOS_LAMBDA(size_t i) { + if (0 == (i % 17)) { + y(i) = KokkosKernels::Impl::quiet_NaN(); + } + }); + } + return std::make_tuple(x, y); } @@ -356,7 +377,8 @@ void test_spmv_combos(const char *mode, const Bsr &a, const Crs &acrs, using scalar_type = typename Bsr::non_const_value_type; using execution_space = typename Bsr::execution_space; - auto [x, y] = random_vecs_for_spmv(mode, a); + auto [x, y] = random_vecs_for_spmv(mode, a); + auto [x_with_nans, y_with_nans] = random_vecs_for_spmv(mode, a, true); using handle_t = SPMVHandle; @@ -389,6 +411,10 @@ void test_spmv_combos(const char *mode, const Bsr &a, const Crs &acrs, for (scalar_type beta : {scalar_type(0), scalar_type(1), scalar_type(-1), scalar_type(-1.5)}) { test_spmv(handle, mode, alpha, beta, a, acrs, maxNnzPerRow, x, y); + if (beta == scalar_type(0)) { + test_spmv(handle, mode, alpha, beta, a, acrs, maxNnzPerRow, + x_with_nans, y_with_nans); + } } } } @@ -563,10 +589,14 @@ struct MultiVectorTypeFor { */ template auto random_multivecs_for_spm_mv(const char *mode, const Bsr &a, - const size_t numVecs) { + const size_t numVecs, const bool nans = false) + -> std::tuple::type, + typename MultiVectorTypeFor::type> { using scalar_type = typename Bsr::non_const_value_type; using vector_type = typename MultiVectorTypeFor::type; using execution_space = typename Bsr::execution_space; + using policy_type = + Kokkos::RangePolicy; size_t nx = a.numCols() * a.blockDim(); size_t ny = a.numRows() * a.blockDim(); @@ -580,6 +610,26 @@ auto random_multivecs_for_spm_mv(const char *mode, const Bsr &a, Kokkos::fill_random(x, random, max_x()); Kokkos::fill_random(y, random, max_y()); + // sprinkle some "random" NaNs in + if (nans) { + Kokkos::parallel_for( + policy_type(0, x.extent(0)), KOKKOS_LAMBDA(size_t i) { + for (size_t j = 0; j < x.extent(1); ++j) { + if (0 == ((i * x.extent(1) + j) % 13)) { + x(i, j) = KokkosKernels::Impl::quiet_NaN(); + } + } + }); + Kokkos::parallel_for( + policy_type(0, y.extent(0)), KOKKOS_LAMBDA(size_t i) { + for (size_t j = 0; j < y.extent(1); ++j) { + if (0 == ((i * y.extent(1) + j) % 17)) { + y(i, j) = KokkosKernels::Impl::quiet_NaN(); + } + } + }); + } + return std::make_tuple(x, y); } @@ -618,12 +668,18 @@ void test_spm_mv_combos(const char *mode, const Bsr &a, const Crs &acrs, for (size_t numVecs : {1, 7}) { // num multivecs auto [x, y] = random_multivecs_for_spm_mv(mode, a, numVecs); + auto [x_with_nans, y_with_nans] = + random_multivecs_for_spm_mv(mode, a, numVecs, true); for (handle_t *handle : handles) { for (scalar_type alpha : {scalar_type(0), scalar_type(1), scalar_type(-1), scalar_type(3.7)}) { for (scalar_type beta : {scalar_type(0), scalar_type(1), scalar_type(-1), scalar_type(-1.5)}) { test_spm_mv(handle, mode, alpha, beta, a, acrs, maxNnzPerRow, x, y); + if (beta == scalar_type(0)) { + test_spm_mv(handle, mode, alpha, beta, a, acrs, maxNnzPerRow, + x_with_nans, y_with_nans); + } } } }