Skip to content

Commit

Permalink
SpMV: Test NaN, fix NaN handling when beta=0 (kokkos#2188)
Browse files Browse the repository at this point in the history
* Test_Sparse_spmv_bsr.hpp: add NaNs to tests

* handle NaN in spmv_beta_transpose when beta=0

* handle nan in SpmvMergeHierarchical when beta=0

* Test NaNs in Y, don't reuse modifed Y, catch NaNs in results test

* remove unused <iostream> include

* explicit casting of zero

* Test_sparse_spmv.hpp: remove unused nans parameter

* KokkosSparse_spmv.hpp: CUDA11 can't detect this function always returns

* Test_Sparse_spmv.hpp: remove unused variable

* Run unit tests in correct execution space

* Test_Sparse_spmv.hpp: remove unused type aliases

* Kokkos::nan() -> KokkosKernels::Impl::quiet_NaN()
  • Loading branch information
cwpearson authored May 22, 2024
1 parent d7dace1 commit 3414c91
Show file tree
Hide file tree
Showing 6 changed files with 178 additions and 29 deletions.
44 changes: 44 additions & 0 deletions common/impl/KokkosKernels_NaN.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//@HEADER
// ************************************************************************
//
// Kokkos v. 4.0
// Copyright (2022) National Technology & Engineering
// Solutions of Sandia, LLC (NTESS).
//
// Under the terms of Contract DE-NA0003525 with NTESS,
// the U.S. Government retains certain rights in this software.
//
// Part of Kokkos, under the Apache License v2.0 with LLVM Exceptions.
// See https://kokkos.org/LICENSE for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//@HEADER

#ifndef KOKKOSKERNELS_NAN_HPP
#define KOKKOSKERNELS_NAN_HPP

#include <Kokkos_ArithTraits.hpp>
#include <Kokkos_NumericTraits.hpp>

namespace KokkosKernels::Impl {

// This could be constexpr if Kokkos::complex ctor was
template <typename T>
KOKKOS_INLINE_FUNCTION T quiet_NaN() {
if constexpr (std::is_same_v<double, T>) {
return double(Kokkos::Experimental::quiet_NaN_v<
float>); // Kokkos::Experimetnal::quiet_NaN_v<double>
// is undefined in
// device code
} else if constexpr (Kokkos::ArithTraits<T>::is_complex) {
using value_type = typename T::value_type;
return T(quiet_NaN<value_type>(),
quiet_NaN<value_type>()); // Kokkos::complex ctor is not constexpr
} else {
return Kokkos::Experimental::quiet_NaN_v<T>;
}
}

} // namespace KokkosKernels::Impl

#endif // KOKKOSKERNELS_NAN_HPP
18 changes: 12 additions & 6 deletions sparse/impl/KokkosSparse_spmv_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -450,16 +450,19 @@ static void spmv_beta_transpose(const execution_space& exec,
const AMatrix& A, const XVector& x,
typename YVector::const_value_type& beta,
const YVector& y) {
using ordinal_type = typename AMatrix::non_const_ordinal_type;
using size_type = typename AMatrix::non_const_size_type;
using ordinal_type = typename AMatrix::non_const_ordinal_type;
using size_type = typename AMatrix::non_const_size_type;
using y_scalar_type = typename YVector::non_const_value_type;

if (A.numRows() <= static_cast<ordinal_type>(0)) {
return;
}

// We need to scale y first ("scaling" by zero just means filling
// with zeros), since the functor works by atomic-adding into y.
if (dobeta != 1) {
if (0 == dobeta || y_scalar_type(0) == beta) {
Kokkos::deep_copy(exec, y, y_scalar_type(0));
} else if (dobeta != 1) {
KokkosBlas::scal(exec, y, beta, y);
}

Expand Down Expand Up @@ -540,16 +543,19 @@ static void spmv_beta_transpose(const execution_space& exec,
const AMatrix& A, const XVector& x,
typename YVector::const_value_type& beta,
const YVector& y) {
using ordinal_type = typename AMatrix::non_const_ordinal_type;
using size_type = typename AMatrix::non_const_size_type;
using ordinal_type = typename AMatrix::non_const_ordinal_type;
using size_type = typename AMatrix::non_const_size_type;
using y_scalar_type = typename YVector::non_const_value_type;

if (A.numRows() <= static_cast<ordinal_type>(0)) {
return;
}

// We need to scale y first ("scaling" by zero just means filling
// with zeros), since the functor works by atomic-adding into y.
if (dobeta != 1) {
if (0 == dobeta || y_scalar_type(0) == beta) {
Kokkos::deep_copy(exec, y, y_scalar_type(0));
} else if (dobeta != 1) {
KokkosBlas::scal(exec, y, beta, y);
}

Expand Down
6 changes: 5 additions & 1 deletion sparse/impl/KokkosSparse_spmv_impl_merge.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -309,7 +309,11 @@ struct SpmvMergeHierarchical {
static_assert(XVector::rank == 1, "");
static_assert(YVector::rank == 1, "");

KokkosBlas::scal(y, beta, y);
if (y_value_type(0) == beta) {
Kokkos::deep_copy(space, y, y_value_type(0));
} else {
KokkosBlas::scal(space, y, beta, y);
}

/* determine launch parameters for different architectures
On architectures where there is a natural execution hierarchy with true
Expand Down
1 change: 1 addition & 0 deletions sparse/src/KokkosSparse_spmv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ inline constexpr bool spmv_general_tpl_avail() {
return spmv_mv_bsrmatrix_tpl_spec_avail<ExecutionSpace, Handle, AMatrix,
XVector, YVector>::value;
}
return false;
}
} // namespace Impl

Expand Down
76 changes: 57 additions & 19 deletions sparse/unit_test/Test_Sparse_spmv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <KokkosKernels_IOUtils.hpp>
#include <KokkosSparse_IOUtils.hpp>
#include <KokkosKernels_Utils.hpp>
#include <KokkosKernels_NaN.hpp>

#include "KokkosKernels_default_types.hpp"

Expand Down Expand Up @@ -86,7 +87,10 @@ struct fSPMV {
void operator()(const int i, value_type &err) const {
const mag_type error = AT::abs(expected_y(i) - y(i));

if (error > eps * max_val) {
// only one is NaN or error is too large
if ((Kokkos::isnan(AT::abs(expected_y(i))) ^
Kokkos::isnan(AT::abs(y(i)))) ||
(error > eps * max_val)) {
err++;
Kokkos::printf("expected_y(%d)=%f, y(%d)=%f err=%e, max_error=%e\n", i,
AT::abs(expected_y(i)), i, AT::abs(y(i)), error,
Expand Down Expand Up @@ -116,6 +120,7 @@ void sequential_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y,
using size_type_view_t = typename graph_t::row_map_type;
using lno_view_t = typename graph_t::entries_type;
using scalar_view_t = typename crsMat_t::values_type::non_const_type;
using y_scalar_t = typename y_vector_type::non_const_value_type;

using size_type = typename size_type_view_t::non_const_value_type;
using lno_t = typename lno_view_t::non_const_value_type;
Expand Down Expand Up @@ -145,7 +150,13 @@ void sequential_spmv(crsMat_t input_mat, x_vector_type x, y_vector_type y,
lno_t nr = input_mat.numRows();

// first, scale y by beta
for (size_t i = 0; i < h_y.extent(0); i++) h_y(i) *= beta;
for (size_t i = 0; i < h_y.extent(0); i++) {
if (beta == y_scalar_t(0)) {
h_y(i) = y_scalar_t(0);
} else {
h_y(i) *= beta;
}
}

// then go through the matrix and accumulate the matrix-vector product
for (lno_t row = 0; row < nr; ++row) {
Expand Down Expand Up @@ -184,17 +195,19 @@ void check_spmv(

const y_value_mag_type eps =
10 * Kokkos::ArithTraits<y_value_mag_type>::eps();
bool transposed = (mode == "T") || (mode == "H");
y_vector_type expected_y(
"expected", transposed ? input_mat.numCols() : input_mat.numRows());

y_vector_type actual_y("actual_y", y.extent(0));
y_vector_type expected_y("expected_y", y.extent(0));
Kokkos::deep_copy(expected_y, y);
Kokkos::deep_copy(actual_y, y);
Kokkos::fence();

sequential_spmv(input_mat, x, expected_y, alpha, beta, mode);
bool threw = false;
std::string msg;
try {
KokkosSparse::spmv(handle, mode.data(), alpha, input_mat, x, beta, y);
KokkosSparse::spmv(handle, mode.data(), alpha, input_mat, x, beta,
actual_y);
Kokkos::fence();
} catch (std::exception &e) {
threw = true;
Expand All @@ -203,11 +216,11 @@ void check_spmv(
ASSERT_FALSE(threw) << "KokkosSparse::Test::spmv 1D, mode " << mode
<< ": threw exception:\n"
<< msg << '\n';

int num_errors = 0;
Kokkos::parallel_reduce(
"KokkosSparse::Test::spmv", my_exec_space(0, y.extent(0)),
fSPMV<y_vector_type, y_vector_type>(expected_y, y, eps, max_val),
num_errors);
"KokkosSparse::Test::spmv", my_exec_space(0, actual_y.extent(0)),
fSPMV(expected_y, actual_y, eps, max_val), num_errors);
if (num_errors > 0)
printf("KokkosSparse::Test::spmv: %i errors of %i with params: %lf %lf\n",
num_errors, y.extent_int(0), y_value_trait::abs(alpha),
Expand Down Expand Up @@ -266,10 +279,9 @@ void check_spmv_mv(

auto y_spmv = Kokkos::subview(y, Kokkos::ALL(), i);
int num_errors = 0;
Kokkos::parallel_reduce(
"KokkosSparse::Test::spmv_mv", my_exec_space(0, y_i.extent(0)),
fSPMV<decltype(y_i), decltype(y_spmv)>(y_i, y_spmv, eps, max_val),
num_errors);
Kokkos::parallel_reduce("KokkosSparse::Test::spmv_mv",
my_exec_space(0, y_i.extent(0)),
fSPMV(y_i, y_spmv, eps, max_val), num_errors);
if (num_errors > 0)
std::cout << "KokkosSparse::Test::spmv_mv: " << num_errors
<< " errors of " << y_i.extent_int(0) << " for mv " << i
Expand Down Expand Up @@ -404,6 +416,7 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz,
using mag_t = typename Kokkos::ArithTraits<scalar_t>::mag_type;
using handle_t =
KokkosSparse::SPMVHandle<Device, crsMat_t, x_vector_type, y_vector_type>;
using y_policy = Kokkos::RangePolicy<typename y_vector_type::execution_space>;

constexpr mag_t max_x = static_cast<mag_t>(1);
constexpr mag_t max_y = static_cast<mag_t>(1);
Expand All @@ -419,18 +432,35 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz,
const lno_t max_nnz_per_row =
numRows ? (nnz / numRows + row_size_variance) : 0;

// Create vectors with and without nans
x_vector_type input_x("x", nc);
y_vector_type output_y("y", nr);
x_vector_type input_xt("x", nr);
y_vector_type output_yt("y", nc);
y_vector_type input_y("y", nr), input_y_nans("y_nans", nr);
y_vector_type input_yt("y", nc), input_yt_nans("y_nans", nc);

Kokkos::Random_XorShift64_Pool<typename Device::execution_space> rand_pool(
13718);

Kokkos::fill_random(input_x, rand_pool, randomUpperBound<scalar_t>(max_x));
Kokkos::fill_random(output_y, rand_pool, randomUpperBound<scalar_t>(max_y));
Kokkos::fill_random(input_y, rand_pool, randomUpperBound<scalar_t>(max_y));
Kokkos::fill_random(input_xt, rand_pool, randomUpperBound<scalar_t>(max_x));
Kokkos::fill_random(output_yt, rand_pool, randomUpperBound<scalar_t>(max_y));
Kokkos::fill_random(input_yt, rand_pool, randomUpperBound<scalar_t>(max_y));

// sprinkle in some nans
Kokkos::deep_copy(input_y_nans, input_y);
Kokkos::deep_copy(input_yt_nans, input_yt);
Kokkos::parallel_for(
y_policy(0, input_y_nans.extent(0)), KOKKOS_LAMBDA(const size_t i) {
if (0 == (i % 19)) {
input_y_nans(i) = KokkosKernels::Impl::quiet_NaN<scalar_t>();
}
});
Kokkos::parallel_for(
y_policy(0, input_yt_nans.extent(0)), KOKKOS_LAMBDA(const size_t i) {
if (0 == (i % 23)) {
input_yt_nans(i) = KokkosKernels::Impl::quiet_NaN<scalar_t>();
}
});

// We also need to bound the values
// in the matrix to bound the cancellations
Expand All @@ -457,8 +487,12 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz,
for (double beta : testAlphaBeta) {
mag_t max_error =
beta * max_y + alpha * max_nnz_per_row * max_val * max_x;
Test::check_spmv(&handle, input_mat, input_x, output_y, alpha, beta,
Test::check_spmv(&handle, input_mat, input_x, input_y, alpha, beta,
mode, max_error);
if (0 == beta) {
Test::check_spmv(&handle, input_mat, input_x, input_y_nans, alpha,
beta, mode, max_error);
}
}
}
}
Expand All @@ -468,8 +502,12 @@ void test_spmv(KokkosSparse::SPMVAlgorithm algo, lno_t numRows, size_type nnz,
// hoping the transpose won't have a long column...
mag_t max_error =
beta * max_y + alpha * max_nnz_per_row * max_val * max_x;
Test::check_spmv(&handle, input_mat, input_xt, output_yt, alpha, beta,
Test::check_spmv(&handle, input_mat, input_xt, input_yt, alpha, beta,
mode, max_error);
if (0 == beta) {
Test::check_spmv(&handle, input_mat, input_x, input_yt_nans, alpha,
beta, mode, max_error);
}
}
}
}
Expand Down
Loading

0 comments on commit 3414c91

Please sign in to comment.