Skip to content

Commit

Permalink
RBILUK: Use new KK::sptrsv block support instead of KK::trsv
Browse files Browse the repository at this point in the history
Signed-off-by: James Foucar <[email protected]>
  • Loading branch information
jgfouca committed Oct 18, 2024
1 parent 1beef8c commit f08be9c
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 32 deletions.
2 changes: 2 additions & 0 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_decl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,8 @@ class RBILUK : virtual public Ifpack2::RILUK< Tpetra::RowMatrix< typename Matrix

//! The inverse of the diagonal
Teuchos::RCP<block_crs_matrix_type> D_block_inverse_;

Kokkos::View<scalar_type*, typename values_device_view_type::device_type> tmp_;
};


Expand Down
90 changes: 58 additions & 32 deletions packages/ifpack2/src/Ifpack2_Experimental_RBILUK_def.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#include "Ifpack2_LocalFilter.hpp"
#include "Ifpack2_Utilities.hpp"
#include "Ifpack2_RILUK.hpp"
#include "KokkosSparse_trsv.hpp"
#include "KokkosSparse_sptrsv.hpp"

//#define IFPACK2_RBILUK_INITIAL
//#define IFPACK2_RBILUK_INITIAL_NOKK
Expand Down Expand Up @@ -194,6 +194,11 @@ void RBILUK<MatrixType>::allocate_L_and_U_blocks ()
U_block_->setAllToScalar (STM::zero ());
D_block_->setAllToScalar (STM::zero ());

// Allocate temp space for apply
if (this->isKokkosKernelsSpiluk_) {
const auto numRows = L_block_->getLocalNumRows();
tmp_ = decltype(tmp_)("RBILUK::tmp_", numRows * blockSize_);
}
}
this->isAllocated_ = true;
}
Expand Down Expand Up @@ -1070,7 +1075,7 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t
else { // Solve U^P (D^P (L^P Y)) = X for Y (where P is * or T).
TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm without KokkosKernels. ");
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
}
}
else { // alpha != 1 or beta != 0
Expand All @@ -1088,42 +1093,63 @@ apply (const Tpetra::MultiVector<scalar_type,local_ordinal_type,global_ordinal_t
}
}
else {
// Kokkos kernels impl. For now, the only block trsv available is Sequential
// and must be done on host.
using row_map_type = typename local_matrix_host_type::row_map_type;
using index_type = typename local_matrix_host_type::index_type;
using values_type = typename local_matrix_host_type::values_type;

auto X_view = X.getLocalViewHost(Tpetra::Access::ReadOnly);
auto Y_view = Y.getLocalViewHost(Tpetra::Access::ReadWrite);

auto L_row_ptrs_host = L_block_->getCrsGraph().getLocalRowPtrsHost();
auto L_entries_host = L_block_->getCrsGraph().getLocalIndicesHost();
auto U_row_ptrs_host = U_block_->getCrsGraph().getLocalRowPtrsHost();
auto U_entries_host = U_block_->getCrsGraph().getLocalIndicesHost();
auto L_values_host = L_block_->getValuesHost();
auto U_values_host = U_block_->getValuesHost();

row_map_type* L_row_ptrs_host_ri = reinterpret_cast<row_map_type*>(&L_row_ptrs_host);
index_type* L_entries_host_ri = reinterpret_cast<index_type*>(&L_entries_host);
row_map_type* U_row_ptrs_host_ri = reinterpret_cast<row_map_type*>(&U_row_ptrs_host);
index_type* U_entries_host_ri = reinterpret_cast<index_type*>(&U_entries_host);
values_type* L_values_host_ri = reinterpret_cast<values_type*>(&L_values_host);
values_type* U_values_host_ri = reinterpret_cast<values_type*>(&U_values_host);
// Kokkos kernels impl.
auto X_views = X.getLocalViewDevice(Tpetra::Access::ReadOnly);
auto Y_views = Y.getLocalViewDevice(Tpetra::Access::ReadWrite);

auto lclL = L_block_->getLocalMatrixDevice();
auto L_rowmap = lclL.graph.row_map;
auto L_entries = lclL.graph.entries;
auto L_values = lclL.values;

auto lclU = U_block_->getLocalMatrixDevice();
auto U_rowmap = lclU.graph.row_map;
auto U_entries = lclU.graph.entries;
auto U_values = lclU.values;

const auto numRows = L_block_->getLocalNumRows();
local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries_host.size(), *L_values_host_ri, *L_row_ptrs_host_ri, *L_entries_host_ri, blockSize_);
local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries_host.size(), *U_values_host_ri, *U_row_ptrs_host_ri, *U_entries_host_ri, blockSize_);
local_matrix_host_type L_block_local_host("L_block_local_host", numRows, numRows, L_entries.size(), L_values, L_rowmap, L_entries, blockSize_);
local_matrix_host_type U_block_local_host("U_block_local_host", numRows, numRows, U_entries.size(), U_values, U_rowmap, U_entries, blockSize_);

if (mode == Teuchos::NO_TRANS) {
KokkosSparse::trsv("L", "N", "N", L_block_local_host, X_view, Y_view);
KokkosSparse::trsv("U", "N", "N", U_block_local_host, Y_view, Y_view);
KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
KokkosSparse::Experimental::SPTRSVAlgorithm alg = KokkosSparse::Experimental::SPTRSVAlgorithm::SEQLVLSCHD_RP;
{
KernelHandle_->create_sptrsv_handle(alg, numRows, true /*lower*/, blockSize_);
KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values);
Kokkos::fence();

const LO numVecs = X.getNumVectors();
for (LO vec = 0; vec < numVecs; ++vec) {
auto X_view = Kokkos::subview(X_views, Kokkos::ALL(), vec);
auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), L_rowmap, L_entries, L_values, X_view, tmp_);
}
Kokkos::fence();

KernelHandle_->destroy_sptrsv_handle();
}

{
KernelHandle_->create_sptrsv_handle(alg, numRows, false /*upper*/, blockSize_);
KokkosSparse::Experimental::sptrsv_symbolic(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values);
Kokkos::fence();

const LO numVecs = X.getNumVectors();
for (LO vec = 0; vec < numVecs; ++vec) {
auto Y_view = Kokkos::subview(Y_views, Kokkos::ALL(), vec);
KokkosSparse::Experimental::sptrsv_solve(KernelHandle_.getRawPtr(), U_rowmap, U_entries, U_values, tmp_, Y_view);
}
Kokkos::fence();

KernelHandle_->destroy_sptrsv_handle();
}

KokkosBlas::axpby(alpha, Y_views, beta, Y_views);
}
else {
KokkosSparse::trsv("U", "T", "N", U_block_local_host, X_view, Y_view);
KokkosSparse::trsv("L", "T", "N", L_block_local_host, Y_view, Y_view);
KokkosBlas::axpby(alpha, Y_view, beta, Y_view);
TEUCHOS_TEST_FOR_EXCEPTION(
true, std::runtime_error,
"Ifpack2::Experimental::RBILUK::apply: transpose apply is not implemented for the block algorithm");
}

//Y.getWrappedDualView().sync();
Expand Down

0 comments on commit f08be9c

Please sign in to comment.