diff --git a/CMakeLists.txt b/CMakeLists.txt index 705448b83..00eb16da9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -100,7 +100,8 @@ endif() if(ENABLE_MKLCPU_BACKEND OR ENABLE_MKLGPU_BACKEND OR ENABLE_CUSOLVER_BACKEND - OR ENABLE_ROCSOLVER_BACKEND) + OR ENABLE_ROCSOLVER_BACKEND + OR ENABLE_ARMPL_BACKEND) list(APPEND DOMAINS_LIST "lapack") endif() if(ENABLE_MKLCPU_BACKEND diff --git a/README.md b/README.md index 528e8081e..d5cffe813 100644 --- a/README.md +++ b/README.md @@ -251,12 +251,18 @@ Supported compilers include: Dynamic, Static - LAPACK + LAPACK x86 CPU Intel(R) oneMKL Intel DPC++ Dynamic, Static + + aarch64 CPU + Arm Performance Libraries + Open DPC++
AdaptiveCpp + Dynamic, Static + Intel GPU Intel(R) oneMKL diff --git a/include/oneapi/math/detail/backends_table.hpp b/include/oneapi/math/detail/backends_table.hpp index 48e5f98c6..0ecb43cd0 100644 --- a/include/oneapi/math/detail/backends_table.hpp +++ b/include/oneapi/math/detail/backends_table.hpp @@ -151,6 +151,13 @@ static std::map>> libraries = LIB_NAME("lapack_mklcpu") #endif } }, + { device::aarch64cpu, + { +#ifdef ONEMATH_ENABLE_ARMPL_BACKEND + LIB_NAME("lapack_armpl"), +#endif + } }, + { device::intelgpu, { #ifdef ONEMATH_ENABLE_MKLGPU_BACKEND diff --git a/include/oneapi/math/lapack.hpp b/include/oneapi/math/lapack.hpp index c862513c4..56202c301 100644 --- a/include/oneapi/math/lapack.hpp +++ b/include/oneapi/math/lapack.hpp @@ -33,5 +33,8 @@ #ifdef ONEMATH_ENABLE_ROCSOLVER_BACKEND #include "oneapi/math/lapack/detail/rocsolver/lapack_ct.hpp" #endif +#ifdef ONEMATH_ENABLE_ARMPL_BACKEND +#include "oneapi/math/lapack/detail/armpl/lapack_ct.hpp" +#endif #include "oneapi/math/lapack/detail/lapack_rt.hpp" diff --git a/include/oneapi/math/lapack/detail/armpl/lapack_ct.hpp b/include/oneapi/math/lapack/detail/armpl/lapack_ct.hpp new file mode 100644 index 000000000..eb2a08b56 --- /dev/null +++ b/include/oneapi/math/lapack/detail/armpl/lapack_ct.hpp @@ -0,0 +1,43 @@ +/******************************************************************************* +* Copyright 2025 SiPearl +* Copyright 2021 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#pragma once + +#include +#include + +#include + +#include "oneapi/math/types.hpp" +#include "oneapi/math/lapack/types.hpp" +#include "oneapi/math/detail/backend_selector.hpp" +#include "oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hpp" + +namespace oneapi { +namespace math { +namespace lapack { + +#define LAPACK_BACKEND armpl +#include "oneapi/math/lapack/detail/armpl/lapack_ct.hxx" +#undef LAPACK_BACKEND + +} //namespace lapack +} //namespace math +} //namespace oneapi diff --git a/include/oneapi/math/lapack/detail/armpl/lapack_ct.hxx b/include/oneapi/math/lapack/detail/armpl/lapack_ct.hxx new file mode 100644 index 000000000..8442f1221 --- /dev/null +++ b/include/oneapi/math/lapack/detail/armpl/lapack_ct.hxx @@ -0,0 +1,2588 @@ +/******************************************************************************* +* Copyright 2025 SiPearl +* Copyright 2021 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +// Buffer APIs + +static inline void gebrd(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); +} +static inline void gebrd(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); +} +static inline void gebrd(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, + sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); +} +static inline void gebrd(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, + sycl::buffer>& taup, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size); +} +static inline void gerqf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void gerqf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void gerqf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void gerqf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void geqrf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void geqrf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void geqrf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void geqrf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void getrf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void getrf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void getrf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void getrf(backend_selector selector, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void getri(backend_selector selector, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void getri(backend_selector selector, std::int64_t n, + sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void getri(backend_selector selector, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void getri(backend_selector selector, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void getrs(backend_selector selector, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); +} +static inline void getrs(backend_selector selector, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, ldb, + scratchpad, scratchpad_size); +} +static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, + vt, ldvt, scratchpad, scratchpad_size); +} +static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& s, + sycl::buffer& u, std::int64_t ldu, sycl::buffer& vt, + std::int64_t ldvt, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, + vt, ldvt, scratchpad, scratchpad_size); +} +static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, + vt, ldvt, scratchpad, scratchpad_size); +} +static inline void gesvd(backend_selector selector, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, + std::int64_t ldu, sycl::buffer>& vt, + std::int64_t ldvt, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, ldu, + vt, ldvt, scratchpad, scratchpad_size); +} +static inline void heevd(backend_selector selector, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); +} +static inline void heevd(backend_selector selector, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); +} +static inline void hegvd(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, + w, scratchpad, scratchpad_size); +} +static inline void hegvd(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer& w, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, + w, scratchpad, scratchpad_size); +} +static inline void hetrd(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); +} +static inline void hetrd(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); +} +static inline void hetrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void hetrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void orgbr(backend_selector selector, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void orgbr(backend_selector selector, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void orgqr(backend_selector selector, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void orgqr(backend_selector selector, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::orgqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void orgtr(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void orgtr(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::orgtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void ormtr(backend_selector selector, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); +} +static inline void ormtr(backend_selector selector, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); +} +static inline void ormrq(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); +} +static inline void ormrq(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); +} +static inline void ormqr(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); +} +static inline void ormqr(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); +} +static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size); +} +static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size); +} +static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size); +} +static inline void potrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size); +} +static inline void potri(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size); +} +static inline void potri(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size); +} +static inline void potri(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size); +} +static inline void potri(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size); +} +static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, + scratchpad, scratchpad_size); +} +static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, + scratchpad, scratchpad_size); +} +static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, + scratchpad, scratchpad_size); +} +static inline void potrs(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, + scratchpad, scratchpad_size); +} +static inline void syevd(backend_selector selector, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); +} +static inline void syevd(backend_selector selector, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, scratchpad, + scratchpad_size); +} +static inline void sygvd(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, + w, scratchpad, scratchpad_size); +} +static inline void sygvd(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, ldb, + w, scratchpad, scratchpad_size); +} +static inline void sytrd(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); +} +static inline void sytrd(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& d, sycl::buffer& e, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, scratchpad, + scratchpad_size); +} +static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void sytrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, scratchpad, + scratchpad_size); +} +static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); +} +static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); +} +static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); +} +static inline void trtrs(backend_selector selector, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, lda, b, + ldb, scratchpad, scratchpad_size); +} +static inline void ungbr(backend_selector selector, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void ungbr(backend_selector selector, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void ungqr(backend_selector selector, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void ungqr(backend_selector selector, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ungqr(selector.get_queue(), m, n, k, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void ungtr(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void ungtr(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ungtr(selector.get_queue(), uplo, n, a, lda, tau, scratchpad, + scratchpad_size); +} +static inline void unmrq(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); +} +static inline void unmrq(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); +} +static inline void unmqr(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); +} +static inline void unmqr(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, tau, c, + ldc, scratchpad, scratchpad_size); +} +static inline void unmtr(backend_selector selector, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); +} +static inline void unmtr(backend_selector selector, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, tau, + c, ldc, scratchpad, scratchpad_size); +} +static inline void geqrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, + stride_tau, batch_size, scratchpad, scratchpad_size); +} +static inline void geqrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, + stride_tau, batch_size, scratchpad, scratchpad_size); +} +static inline void geqrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, + stride_tau, batch_size, scratchpad, scratchpad_size); +} +static inline void geqrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, tau, + stride_tau, batch_size, scratchpad, scratchpad_size); +} +static inline void getri_batch(backend_selector selector, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, scratchpad_size); +} +static inline void getri_batch(backend_selector selector, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, scratchpad_size); +} +static inline void getri_batch(backend_selector selector, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, scratchpad_size); +} +static inline void getri_batch(backend_selector selector, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, scratchpad_size); +} +static inline void getrs_batch(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size); +} +static inline void getrs_batch(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size); +} +static inline void getrs_batch(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size); +} +static inline void getrs_batch(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, sycl::buffer>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, stride_a, + ipiv, stride_ipiv, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size); +} +static inline void getrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, scratchpad_size); +} +static inline void getrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, scratchpad_size); +} +static inline void getrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, scratchpad_size); +} +static inline void getrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, scratchpad_size); +} +static inline void orgqr_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, + stride_tau, batch_size, scratchpad, scratchpad_size); +} +static inline void orgqr_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, + stride_tau, batch_size, scratchpad, scratchpad_size); +} +static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); +} +static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); +} +static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); +} +static inline void potrf_batch(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size); +} +static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, + b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); +} +static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, + b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); +} +static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, + b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); +} +static inline void potrs_batch(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, stride_a, + b, ldb, stride_b, batch_size, scratchpad, + scratchpad_size); +} +static inline void ungqr_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, + stride_tau, batch_size, scratchpad, scratchpad_size); +} +static inline void ungqr_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer>& tau, + std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + oneapi::math::lapack::armpl::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, tau, + stride_tau, batch_size, scratchpad, scratchpad_size); +} + +// USM APIs + +static inline sycl::event gebrd(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tauq, std::complex* taup, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event gebrd(backend_selector selector, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tauq, double* taup, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event gebrd(backend_selector selector, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tauq, float* taup, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event gebrd(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gebrd(selector.get_queue(), m, n, a, lda, d, e, tauq, taup, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event gerqf(backend_selector selector, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event gerqf(backend_selector selector, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event gerqf(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event gerqf(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gerqf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf(backend_selector selector, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf(backend_selector selector, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf(selector.get_queue(), m, n, a, lda, tau, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf(backend_selector selector, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf(backend_selector selector, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf(selector.get_queue(), m, n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri(backend_selector selector, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri(backend_selector selector, std::int64_t n, + double* a, std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri(backend_selector selector, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri(backend_selector selector, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri(selector.get_queue(), n, a, lda, ipiv, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs(selector.get_queue(), trans, n, nrhs, a, lda, ipiv, b, + ldb, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event gesvd(backend_selector selector, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* s, double* u, std::int64_t ldu, double* vt, + std::int64_t ldvt, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event gesvd(backend_selector selector, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* s, float* u, std::int64_t ldu, float* vt, std::int64_t ldvt, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event gesvd(backend_selector selector, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event gesvd(backend_selector selector, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* s, std::complex* u, + std::int64_t ldu, std::complex* vt, std::int64_t ldvt, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::gesvd(selector.get_queue(), jobu, jobvt, m, n, a, lda, s, u, + ldu, vt, ldvt, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event heevd(backend_selector selector, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event heevd(backend_selector selector, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::heevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, float* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event hegvd(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, double* w, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::hegvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event hetrd(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* d, + float* e, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event hetrd(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + double* d, double* e, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::hetrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event hetrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event hetrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::hetrf(selector.get_queue(), uplo, n, a, lda, ipiv, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event orgbr(backend_selector selector, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event orgbr(backend_selector selector, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgbr(selector.get_queue(), vec, m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event orgqr(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgqr(selector.get_queue(), m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event orgqr(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgqr(selector.get_queue(), m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event orgtr(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* tau, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgtr(selector.get_queue(), uplo, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event orgtr(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* tau, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgtr(selector.get_queue(), uplo, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ormtr(backend_selector selector, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event ormtr(backend_selector selector, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ormtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event ormrq(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event ormrq(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ormrq(selector.get_queue(), side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event ormqr(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* c, + std::int64_t ldc, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event ormqr(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* c, + std::int64_t ldc, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ormqr(selector.get_queue(), side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event potrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potri(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potri(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potri(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potri(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potri(selector.get_queue(), uplo, n, a, lda, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event potrs(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs(selector.get_queue(), uplo, n, nrhs, a, lda, b, ldb, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event syevd(backend_selector selector, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event syevd(backend_selector selector, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, + float* w, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::syevd(selector.get_queue(), jobz, uplo, n, a, lda, w, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* w, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event sygvd(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* w, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::sygvd(selector.get_queue(), itype, jobz, uplo, n, a, lda, b, + ldb, w, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event sytrd(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* d, double* e, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event sytrd(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* d, float* e, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::sytrd(selector.get_queue(), uplo, n, a, lda, d, e, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event sytrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, std::int64_t* ipiv, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event sytrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, std::int64_t* ipiv, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event sytrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event sytrf(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::sytrf(selector.get_queue(), uplo, n, a, lda, ipiv, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, double* a, std::int64_t lda, + double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, float* a, std::int64_t lda, + float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event trtrs(backend_selector selector, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::trtrs(selector.get_queue(), uplo, trans, diag, n, nrhs, a, + lda, b, ldb, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event ungbr(backend_selector selector, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ungbr(backend_selector selector, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungbr(selector.get_queue(), vec, m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ungqr(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungqr(selector.get_queue(), m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ungqr(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungqr(selector.get_queue(), m, n, k, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ungtr(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungtr(selector.get_queue(), uplo, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event ungtr(backend_selector selector, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungtr(selector.get_queue(), uplo, n, a, lda, tau, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event unmrq(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event unmrq(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::unmrq(selector.get_queue(), side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event unmqr(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event unmqr(backend_selector selector, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::unmqr(selector.get_queue(), side, trans, m, n, k, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event unmtr(backend_selector selector, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event unmtr(backend_selector selector, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::unmtr(selector.get_queue(), side, uplo, trans, m, n, a, lda, + tau, c, ldc, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, float** tau, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, double** tau, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event geqrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::complex** tau, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::geqrf_batch(selector.get_queue(), m, n, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, stride_a, + ipiv, stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrf_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrf_batch(selector.get_queue(), m, n, a, lda, ipiv, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, + float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, + double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, stride_a, ipiv, + stride_ipiv, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + float** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, ipiv, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + double** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, ipiv, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, ipiv, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getri_batch(backend_selector selector, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getri_batch(selector.get_queue(), n, a, lda, ipiv, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event getrs_batch(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs_batch( + selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs_batch(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs_batch( + selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs_batch( + backend_selector selector, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs_batch( + selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs_batch( + backend_selector selector, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs_batch( + selector.get_queue(), trans, n, nrhs, a, lda, stride_a, ipiv, stride_ipiv, b, ldb, stride_b, + batch_size, scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs_batch(backend_selector selector, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, + std::int64_t** ipiv, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs_batch(backend_selector selector, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, + std::int64_t** ipiv, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs_batch(backend_selector selector, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event getrs_batch( + backend_selector selector, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::getrs_batch(selector.get_queue(), trans, n, nrhs, a, lda, + ipiv, b, ldb, group_count, group_sizes, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + std::int64_t stride_a, float* tau, std::int64_t stride_tau, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + std::int64_t stride_a, double* tau, std::int64_t stride_tau, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event orgqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::orgqr_batch(selector.get_queue(), m, n, k, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event potrf_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, stride_a, + batch_size, scratchpad, scratchpad_size, + dependencies); +} +static inline sycl::event potrf_batch(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrf_batch(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrf_batch(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrf_batch(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrf_batch(selector.get_queue(), uplo, n, a, lda, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, std::int64_t stride_a, float* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, std::int64_t stride_a, double* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event potrs_batch(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, + stride_a, b, ldb, stride_b, batch_size, + scratchpad, scratchpad_size, dependencies); +} +static inline sycl::event potrs_batch(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + float** a, std::int64_t* lda, float** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrs_batch(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + double** a, std::int64_t* lda, double** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrs_batch(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event potrs_batch(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, + std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::potrs_batch(selector.get_queue(), uplo, n, nrhs, a, lda, b, + ldb, group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungqr_batch(selector.get_queue(), m, n, k, a, lda, stride_a, + tau, stride_tau, batch_size, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} +static inline sycl::event ungqr_batch(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::complex** a, + std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}) { + return oneapi::math::lapack::armpl::ungqr_batch(selector.get_queue(), m, n, k, a, lda, tau, + group_count, group_sizes, scratchpad, + scratchpad_size, dependencies); +} + +// SCRATCHPAD APIs +template +std::int64_t gebrd_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::gebrd_scratchpad_size(selector.get_queue(), m, n, + lda); +} +template +std::int64_t gerqf_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::gerqf_scratchpad_size(selector.get_queue(), m, n, + lda); +} +template +std::int64_t geqrf_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::geqrf_scratchpad_size(selector.get_queue(), m, n, + lda); +} +template +std::int64_t gesvd_scratchpad_size(backend_selector selector, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::int64_t lda, + std::int64_t ldu, std::int64_t ldvt) { + return oneapi::math::lapack::armpl::gesvd_scratchpad_size(selector.get_queue(), jobu, + jobvt, m, n, lda, ldu, ldvt); +} +template +std::int64_t getrf_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::getrf_scratchpad_size(selector.get_queue(), m, n, + lda); +} +template +std::int64_t getri_scratchpad_size(backend_selector selector, std::int64_t n, + std::int64_t lda) { + return oneapi::math::lapack::armpl::getri_scratchpad_size(selector.get_queue(), n, + lda); +} +template +std::int64_t getrs_scratchpad_size(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb) { + return oneapi::math::lapack::armpl::getrs_scratchpad_size(selector.get_queue(), trans, + n, nrhs, lda, ldb); +} +template +std::int64_t heevd_scratchpad_size(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { + return oneapi::math::lapack::armpl::heevd_scratchpad_size(selector.get_queue(), jobz, + uplo, n, lda); +} +template +std::int64_t hegvd_scratchpad_size(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda, std::int64_t ldb) { + return oneapi::math::lapack::armpl::hegvd_scratchpad_size(selector.get_queue(), itype, + jobz, uplo, n, lda, ldb); +} +template +std::int64_t hetrd_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::hetrd_scratchpad_size(selector.get_queue(), uplo, + n, lda); +} +template +std::int64_t hetrf_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::hetrf_scratchpad_size(selector.get_queue(), uplo, + n, lda); +} +template +std::int64_t orgbr_scratchpad_size(backend_selector selector, + oneapi::math::generate vect, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda) { + return oneapi::math::lapack::armpl::orgbr_scratchpad_size(selector.get_queue(), vect, + m, n, k, lda); +} +template +std::int64_t orgtr_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::orgtr_scratchpad_size(selector.get_queue(), uplo, + n, lda); +} +template +std::int64_t orgqr_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda) { + return oneapi::math::lapack::armpl::orgqr_scratchpad_size(selector.get_queue(), m, n, + k, lda); +} +template +std::int64_t ormrq_scratchpad_size(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc) { + return oneapi::math::lapack::armpl::ormrq_scratchpad_size(selector.get_queue(), side, + trans, m, n, k, lda, ldc); +} +template +std::int64_t ormqr_scratchpad_size(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc) { + return oneapi::math::lapack::armpl::ormqr_scratchpad_size(selector.get_queue(), side, + trans, m, n, k, lda, ldc); +} +template +std::int64_t ormtr_scratchpad_size(backend_selector selector, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc) { + return oneapi::math::lapack::armpl::ormtr_scratchpad_size(selector.get_queue(), side, + uplo, trans, m, n, lda, ldc); +} +template +std::int64_t potrf_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::potrf_scratchpad_size(selector.get_queue(), uplo, + n, lda); +} +template +std::int64_t potrs_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb) { + return oneapi::math::lapack::armpl::potrs_scratchpad_size(selector.get_queue(), uplo, + n, nrhs, lda, ldb); +} +template +std::int64_t potri_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::potri_scratchpad_size(selector.get_queue(), uplo, + n, lda); +} +template +std::int64_t sytrf_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::sytrf_scratchpad_size(selector.get_queue(), uplo, + n, lda); +} +template +std::int64_t syevd_scratchpad_size(backend_selector selector, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda) { + return oneapi::math::lapack::armpl::syevd_scratchpad_size(selector.get_queue(), jobz, + uplo, n, lda); +} +template +std::int64_t sygvd_scratchpad_size(backend_selector selector, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda, std::int64_t ldb) { + return oneapi::math::lapack::armpl::sygvd_scratchpad_size(selector.get_queue(), itype, + jobz, uplo, n, lda, ldb); +} +template +std::int64_t sytrd_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::sytrd_scratchpad_size(selector.get_queue(), uplo, + n, lda); +} +template +std::int64_t trtrs_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t ldb) { + return oneapi::math::lapack::armpl::trtrs_scratchpad_size( + selector.get_queue(), uplo, trans, diag, n, nrhs, lda, ldb); +} +template +std::int64_t ungbr_scratchpad_size(backend_selector selector, + oneapi::math::generate vect, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda) { + return oneapi::math::lapack::armpl::ungbr_scratchpad_size(selector.get_queue(), vect, + m, n, k, lda); +} +template +std::int64_t ungqr_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda) { + return oneapi::math::lapack::armpl::ungqr_scratchpad_size(selector.get_queue(), m, n, + k, lda); +} +template +std::int64_t ungtr_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda) { + return oneapi::math::lapack::armpl::ungtr_scratchpad_size(selector.get_queue(), uplo, + n, lda); +} +template +std::int64_t unmrq_scratchpad_size(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc) { + return oneapi::math::lapack::armpl::unmrq_scratchpad_size(selector.get_queue(), side, + trans, m, n, k, lda, ldc); +} +template +std::int64_t unmqr_scratchpad_size(backend_selector selector, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc) { + return oneapi::math::lapack::armpl::unmqr_scratchpad_size(selector.get_queue(), side, + trans, m, n, k, lda, ldc); +} +template +std::int64_t unmtr_scratchpad_size(backend_selector selector, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc) { + return oneapi::math::lapack::armpl::unmtr_scratchpad_size(selector.get_queue(), side, + uplo, trans, m, n, lda, ldc); +} +template +std::int64_t getrf_batch_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, std::int64_t batch_size) { + return oneapi::math::lapack::armpl::getrf_batch_scratchpad_size( + selector.get_queue(), m, n, lda, stride_a, stride_ipiv, batch_size); +} +template +std::int64_t getri_batch_scratchpad_size(backend_selector selector, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, std::int64_t batch_size) { + return oneapi::math::lapack::armpl::getri_batch_scratchpad_size( + selector.get_queue(), n, lda, stride_a, stride_ipiv, batch_size); +} +template +std::int64_t getrs_batch_scratchpad_size(backend_selector selector, + oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { + return oneapi::math::lapack::armpl::getrs_batch_scratchpad_size( + selector.get_queue(), trans, n, nrhs, lda, stride_a, stride_ipiv, ldb, stride_b, + batch_size); +} +template +std::int64_t geqrf_batch_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, std::int64_t batch_size) { + return oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size( + selector.get_queue(), m, n, lda, stride_a, stride_tau, batch_size); +} +template +std::int64_t potrf_batch_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size) { + return oneapi::math::lapack::armpl::potrf_batch_scratchpad_size( + selector.get_queue(), uplo, n, lda, stride_a, batch_size); +} +template +std::int64_t potrs_batch_scratchpad_size(backend_selector selector, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { + return oneapi::math::lapack::armpl::potrs_batch_scratchpad_size( + selector.get_queue(), uplo, n, nrhs, lda, stride_a, ldb, stride_b, batch_size); +} +template +std::int64_t orgqr_batch_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_tau, + std::int64_t batch_size) { + return oneapi::math::lapack::armpl::orgqr_batch_scratchpad_size( + selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size); +} +template +std::int64_t ungqr_batch_scratchpad_size(backend_selector selector, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_tau, + std::int64_t batch_size) { + return oneapi::math::lapack::armpl::ungqr_batch_scratchpad_size( + selector.get_queue(), m, n, k, lda, stride_a, stride_tau, batch_size); +} +template +std::int64_t getrf_batch_scratchpad_size(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { + return oneapi::math::lapack::armpl::getrf_batch_scratchpad_size( + selector.get_queue(), m, n, lda, group_count, group_sizes); +} +template +std::int64_t getri_batch_scratchpad_size(backend_selector selector, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { + return oneapi::math::lapack::armpl::getri_batch_scratchpad_size( + selector.get_queue(), n, lda, group_count, group_sizes); +} +template +std::int64_t getrs_batch_scratchpad_size(backend_selector selector, + oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { + return oneapi::math::lapack::armpl::getrs_batch_scratchpad_size( + selector.get_queue(), trans, n, nrhs, lda, ldb, group_count, group_sizes); +} +template +std::int64_t geqrf_batch_scratchpad_size(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { + return oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size( + selector.get_queue(), m, n, lda, group_count, group_sizes); +} +template +std::int64_t orgqr_batch_scratchpad_size(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { + return oneapi::math::lapack::armpl::orgqr_batch_scratchpad_size( + selector.get_queue(), m, n, k, lda, group_count, group_sizes); +} +template +std::int64_t potrf_batch_scratchpad_size(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { + return oneapi::math::lapack::armpl::potrf_batch_scratchpad_size( + selector.get_queue(), uplo, n, lda, group_count, group_sizes); +} +template +std::int64_t potrs_batch_scratchpad_size(backend_selector selector, + oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes) { + return oneapi::math::lapack::armpl::potrs_batch_scratchpad_size( + selector.get_queue(), uplo, n, nrhs, lda, ldb, group_count, group_sizes); +} +template +std::int64_t ungqr_batch_scratchpad_size(backend_selector selector, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes) { + return oneapi::math::lapack::armpl::ungqr_batch_scratchpad_size( + selector.get_queue(), m, n, k, lda, group_count, group_sizes); +} diff --git a/include/oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hpp b/include/oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hpp new file mode 100644 index 000000000..2cb51847a --- /dev/null +++ b/include/oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hpp @@ -0,0 +1,38 @@ +/******************************************************************************* +* Copyright 2025 SiPearl +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#pragma once + +#include + +#include +#include + +#include "oneapi/math/types.hpp" +namespace oneapi { +namespace math { +namespace lapack { +namespace armpl { + +#include "onemath_lapack_armpl.hxx" + +} // namespace armpl +} // namespace lapack +} // namespace math +} // namespace oneapi diff --git a/include/oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hxx b/include/oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hxx new file mode 100644 index 000000000..d9b990ef6 --- /dev/null +++ b/include/oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hxx @@ -0,0 +1,1523 @@ +// Buffer APIs + +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer>& tauq, sycl::buffer>& taup, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tauq, sycl::buffer& taup, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tauq, + sycl::buffer>& taup, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getri(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& b, + std::int64_t ldb, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer& s, sycl::buffer>& u, std::int64_t ldu, + sycl::buffer>& vt, std::int64_t ldvt, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, sycl::buffer& w, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer>& tau, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& w, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb, sycl::buffer& w, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, + sycl::buffer& tau, sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& b, std::int64_t ldb, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer>& a, + std::int64_t lda, sycl::buffer>& tau, + sycl::buffer>& c, std::int64_t ldc, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, + sycl::buffer>& tau, sycl::buffer>& c, + std::int64_t ldc, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size); + +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size); + +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size); + +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size); + +// USM APIs + +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, float* d, float* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* d, double* e, double* tauq, double* taup, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* d, float* e, float* tauq, float* taup, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, double* d, double* e, std::complex* tauq, + std::complex* taup, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t* ipiv, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t* ipiv, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri(sycl::queue& queue, std::int64_t n, std::complex* a, std::int64_t lda, + std::int64_t* ipiv, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t* ipiv, double* b, + std::int64_t ldb, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t* ipiv, float* b, + std::int64_t ldb, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* b, std::int64_t ldb, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, double* a, std::int64_t lda, double* s, double* u, + std::int64_t ldu, double* vt, std::int64_t ldvt, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, float* a, std::int64_t lda, float* s, float* u, + std::int64_t ldu, float* vt, std::int64_t ldvt, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + float* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::complex* a, std::int64_t lda, + double* s, std::complex* u, std::int64_t ldu, std::complex* vt, + std::int64_t ldvt, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, std::complex* a, std::int64_t lda, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, std::int64_t lda, + std::complex* b, std::int64_t ldb, float* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::complex* a, + std::int64_t lda, std::complex* b, std::int64_t ldb, double* w, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, float* d, float* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, double* d, double* e, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, float* a, std::int64_t lda, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, double* a, std::int64_t lda, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, double* a, std::int64_t lda, + double* tau, double* c, std::int64_t ldc, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, float* a, std::int64_t lda, + float* tau, float* c, std::int64_t ldc, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + float* a, std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + double* a, std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, double* a, std::int64_t lda, double* w, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, + std::int64_t n, float* a, std::int64_t lda, float* w, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, double* a, std::int64_t lda, double* b, + std::int64_t ldb, double* w, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, float* a, std::int64_t lda, float* b, + std::int64_t ldb, float* w, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, double* d, double* e, double* tau, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, float* d, float* e, float* tau, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t* ipiv, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t* ipiv, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t* ipiv, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, double* a, + std::int64_t lda, double* b, std::int64_t ldb, double* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, float* a, + std::int64_t lda, float* b, std::int64_t ldb, float* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, + std::complex* a, std::int64_t lda, std::complex* b, + std::int64_t ldb, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::complex* a, std::int64_t lda, + std::complex* tau, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::complex* a, + std::int64_t lda, std::complex* tau, std::complex* c, + std::int64_t ldc, std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::complex* a, std::int64_t lda, std::complex* tau, + std::complex* c, std::int64_t ldc, std::complex* scratchpad, + std::int64_t scratchpad_size, const std::vector& dependencies = {}); + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float** a, std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, float** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, double** b, + std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::complex** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies = {}); + +// SCRATCHPAD APIs + +template +std::int64_t gebrd_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda); + +template +std::int64_t gerqf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda); + +template +std::int64_t geqrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda); + +template +std::int64_t gesvd_scratchpad_size(sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldu, std::int64_t ldvt); + +template +std::int64_t getrf_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda); + +template +std::int64_t getri_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda); + +template +std::int64_t getrs_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); + +template +std::int64_t heevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); + +template +std::int64_t hegvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t ldb); + +template +std::int64_t hetrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); + +template +std::int64_t hetrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); + +template +std::int64_t orgbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda); + +template +std::int64_t orgtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); + +template +std::int64_t orgqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda); + +template +std::int64_t ormrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, std::int64_t ldc); + +template +std::int64_t ormqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, std::int64_t ldc); + +template +std::int64_t ormtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t lda, + std::int64_t ldc); + +template +std::int64_t potrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); + +template +std::int64_t potrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::int64_t lda, std::int64_t ldb); + +template +std::int64_t potri_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); + +template +std::int64_t sytrf_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); + +template +std::int64_t syevd_scratchpad_size(sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda); + +template +std::int64_t sygvd_scratchpad_size(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + std::int64_t ldb); + +template +std::int64_t sytrd_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); + +template +std::int64_t trtrs_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t ldb); + +template +std::int64_t ungbr_scratchpad_size(sycl::queue& queue, oneapi::math::generate vect, std::int64_t m, + std::int64_t n, std::int64_t k, std::int64_t lda); + +template +std::int64_t ungqr_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda); + +template +std::int64_t ungtr_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t lda); + +template +std::int64_t unmrq_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, std::int64_t ldc); + +template +std::int64_t unmqr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, std::int64_t ldc); + +template +std::int64_t unmtr_scratchpad_size(sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t lda, + std::int64_t ldc); + +template +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, std::int64_t batch_size); + +template +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_ipiv, + std::int64_t batch_size); + +template +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_ipiv, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size); + +template +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, std::int64_t batch_size); + +template +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size); + +template +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t stride_a, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size); + +template +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, std::int64_t batch_size); + +template +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, std::int64_t batch_size); + +template +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + +template +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); + +template +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); + +template +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes); + +template +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); + +template +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); + +template +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo* uplo, + std::int64_t* n, std::int64_t* nrhs, std::int64_t* lda, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes); + +template +std::int64_t ungqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, std::int64_t* group_sizes); diff --git a/src/lapack/backends/CMakeLists.txt b/src/lapack/backends/CMakeLists.txt index 44ac43b38..3f2a21cac 100644 --- a/src/lapack/backends/CMakeLists.txt +++ b/src/lapack/backends/CMakeLists.txt @@ -35,3 +35,8 @@ endif() if(ENABLE_ROCSOLVER_BACKEND) add_subdirectory(rocsolver) endif() + +if(ENABLE_ARMPL_BACKEND) + add_subdirectory(armpl) +endif() + diff --git a/src/lapack/backends/armpl/CMakeLists.txt b/src/lapack/backends/armpl/CMakeLists.txt new file mode 100644 index 000000000..704ea9f62 --- /dev/null +++ b/src/lapack/backends/armpl/CMakeLists.txt @@ -0,0 +1,66 @@ +#=============================================================================== +# Copyright 2025 SiPearl +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# +# SPDX-License-Identifier: Apache-2.0 +#=============================================================================== + +set(LIB_NAME onemath_lapack_armpl) +set(LIB_OBJ ${LIB_NAME}_obj) + +# Add third-party library +find_package(ARMPL REQUIRED) + +add_library(${LIB_NAME}) +add_library(${LIB_OBJ} OBJECT + armpl_wrappers.cpp + armpl_batch.cpp + $<$: armpl_wrappers_table_dyn.cpp> + +) + +target_include_directories(${LIB_OBJ} + PUBLIC ${ONEMATH_INCLUDE_DIRS} + PRIVATE ${PROJECT_SOURCE_DIR}/src/include + ${PROJECT_SOURCE_DIR}/src + ${ARMPL_INCLUDE} +) + +target_link_libraries(${LIB_OBJ} + PUBLIC ONEMATH::SYCL::SYCL ${ARMPL_LINK} +) + +target_compile_features(${LIB_OBJ} PUBLIC cxx_std_14) +set_target_properties(${LIB_OBJ} PROPERTIES + POSITION_INDEPENDENT_CODE ON +) +target_link_libraries(${LIB_NAME} PRIVATE ${LIB_OBJ}) +target_include_directories(${LIB_NAME} PUBLIC ${ONEMATH_INCLUDE_DIRS}) + +# Add major version to the library +set_target_properties(${LIB_NAME} PROPERTIES + SOVERSION ${PROJECT_VERSION_MAJOR} +) + +# Add dependencies rpath to the library +list(APPEND CMAKE_BUILD_RPATH $) + +# Add the library to install package +install(TARGETS ${LIB_OBJ} EXPORT oneMathTargets) +install(TARGETS ${LIB_NAME} EXPORT oneMathTargets + RUNTIME DESTINATION bin + ARCHIVE DESTINATION lib + LIBRARY DESTINATION lib +) diff --git a/src/lapack/backends/armpl/armpl_batch.cpp b/src/lapack/backends/armpl/armpl_batch.cpp new file mode 100644 index 000000000..66eb39c9f --- /dev/null +++ b/src/lapack/backends/armpl/armpl_batch.cpp @@ -0,0 +1,986 @@ +/*************************************************************************** +* Copyright 2025 SiPearl +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ +#include "armpl_common.hpp" + +#include "oneapi/math/exceptions.hpp" +#include "oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hpp" + +namespace oneapi { +namespace math { +namespace lapack { +namespace armpl { + +// BATCH BUFFER API + +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "geqrf_batch"); +} +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& tau, + std::int64_t stride_tau, std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "geqrf_batch"); +} +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "geqrf_batch"); +} +void geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "geqrf_batch"); +} +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getri_batch"); +} +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getri_batch"); +} +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getri_batch"); +} +void getri_batch(sycl::queue& queue, std::int64_t n, sycl::buffer>& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer>& scratchpad, std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getri_batch"); +} +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, sycl::buffer& b, + std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getrs_batch"); +} +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getrs_batch"); +} +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getrs_batch"); +} +void getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, sycl::buffer>& a, std::int64_t lda, + std::int64_t stride_a, sycl::buffer& ipiv, std::int64_t stride_ipiv, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getrs_batch"); +} +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getrf_batch"); +} +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, std::int64_t stride_a, sycl::buffer& ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getrf_batch"); +} +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getrf_batch"); +} +void getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "getrf_batch"); +} +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + throw unimplemented("lapack", "orgqr_batch"); +} +void orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& tau, std::int64_t stride_tau, std::int64_t batch_size, + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { + throw unimplemented("lapack", "orgqr_batch"); +} +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "potrf_batch"); +} +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "potrf_batch"); +} +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "potrf_batch"); +} +void potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "potrf_batch"); +} +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "potrs_batch"); +} +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "potrs_batch"); +} +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "potrs_batch"); +} +void potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "potrs_batch"); +} +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "ungqr_batch"); +} +void ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer>& a, std::int64_t lda, std::int64_t stride_a, + sycl::buffer>& tau, std::int64_t stride_tau, + std::int64_t batch_size, sycl::buffer>& scratchpad, + std::int64_t scratchpad_size) { + throw unimplemented("lapack", "ungqr_batch"); +} + +// BATCH USM API + +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "geqrf_batch"); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "geqrf_batch"); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "geqrf_batch"); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::complex* tau, + std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "geqrf_batch"); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "geqrf_batch"); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "geqrf_batch"); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "geqrf_batch"); +} +sycl::event geqrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "geqrf_batch"); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrf_batch"); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrf_batch"); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrf_batch"); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrf_batch"); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, float** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrf_batch"); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, double** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrf_batch"); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrf_batch"); +} +sycl::event getrf_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, + std::complex** a, std::int64_t* lda, std::int64_t** ipiv, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrf_batch"); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, float* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getri_batch"); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, double* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::int64_t batch_size, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getri_batch"); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getri_batch"); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t n, std::complex* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t* ipiv, + std::int64_t stride_ipiv, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getri_batch"); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, float** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getri_batch"); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, double** a, std::int64_t* lda, + std::int64_t** ipiv, std::int64_t group_count, std::int64_t* group_sizes, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getri_batch"); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getri_batch"); +} +sycl::event getri_batch(sycl::queue& queue, std::int64_t* n, std::complex** a, + std::int64_t* lda, std::int64_t** ipiv, std::int64_t group_count, + std::int64_t* group_sizes, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getri_batch"); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, float* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrs_batch"); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t* ipiv, std::int64_t stride_ipiv, double* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrs_batch"); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrs_batch"); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::int64_t* ipiv, std::int64_t stride_ipiv, + std::complex* b, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrs_batch"); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, float** a, std::int64_t* lda, std::int64_t** ipiv, + float** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrs_batch"); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, double** a, std::int64_t* lda, std::int64_t** ipiv, + double** b, std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrs_batch"); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrs_batch"); +} +sycl::event getrs_batch(sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, + std::int64_t* nrhs, std::complex** a, std::int64_t* lda, + std::int64_t** ipiv, std::complex** b, std::int64_t* ldb, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "getrs_batch"); +} +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + float* a, std::int64_t lda, std::int64_t stride_a, float* tau, + std::int64_t stride_tau, std::int64_t batch_size, float* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "orgqr_batch"); +} +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + double* a, std::int64_t lda, std::int64_t stride_a, double* tau, + std::int64_t stride_tau, std::int64_t batch_size, double* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "orgqr_batch"); +} +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + float** a, std::int64_t* lda, float** tau, std::int64_t group_count, + std::int64_t* group_sizes, float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "orgqr_batch"); +} +sycl::event orgqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + double** a, std::int64_t* lda, double** tau, std::int64_t group_count, + std::int64_t* group_sizes, double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "orgqr_batch"); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, float* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrf_batch"); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, double* a, + std::int64_t lda, std::int64_t stride_a, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrf_batch"); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrf_batch"); +} +sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::int64_t batch_size, std::complex* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrf_batch"); +} + +template +inline sycl::event potrf_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, T** a, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrf_batch"); +} + +#define POTRF_BATCH_LAUNCHER_USM(TYPE) \ + sycl::event potrf_batch( \ + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, TYPE** a, \ + std::int64_t* lda, std::int64_t group_count, std::int64_t* group_sizes, TYPE* scratchpad, \ + std::int64_t scratchpad_size, const std::vector& dependencies) { \ + return potrf_batch(queue, uplo, n, a, lda, group_count, group_sizes, scratchpad, \ + scratchpad_size, dependencies); \ + } + +POTRF_BATCH_LAUNCHER_USM(float) +POTRF_BATCH_LAUNCHER_USM(double) +POTRF_BATCH_LAUNCHER_USM(std::complex) +POTRF_BATCH_LAUNCHER_USM(std::complex) + +#undef POTRF_BATCH_LAUNCHER_USM + +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, float* a, std::int64_t lda, std::int64_t stride_a, + float* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + float* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrs_batch"); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, double* a, std::int64_t lda, std::int64_t stride_a, + double* b, std::int64_t ldb, std::int64_t stride_b, std::int64_t batch_size, + double* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrs_batch"); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrs_batch"); +} +sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, + std::int64_t nrhs, std::complex* a, std::int64_t lda, + std::int64_t stride_a, std::complex* b, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrs_batch"); +} + +template +inline sycl::event potrs_batch(sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, + std::int64_t* nrhs, T** a, std::int64_t* lda, T** b, + std::int64_t* ldb, std::int64_t group_count, + std::int64_t* group_sizes, T* scratchpad, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "potrs_batch"); +} + +#define POTRS_BATCH_LAUNCHER_USM(TYPE) \ + sycl::event potrs_batch( \ + sycl::queue& queue, oneapi::math::uplo* uplo, std::int64_t* n, std::int64_t* nrhs, \ + TYPE** a, std::int64_t* lda, TYPE** b, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return potrs_batch(queue, uplo, n, nrhs, a, lda, b, ldb, group_count, group_sizes, \ + scratchpad, scratchpad_size, dependencies); \ + } + +POTRS_BATCH_LAUNCHER_USM(float) +POTRS_BATCH_LAUNCHER_USM(double) +POTRS_BATCH_LAUNCHER_USM(std::complex) +POTRS_BATCH_LAUNCHER_USM(std::complex) + +#undef POTRS_BATCH_LAUNCHER_USM + +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "ungqr_batch"); +} +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, + std::complex* a, std::int64_t lda, std::int64_t stride_a, + std::complex* tau, std::int64_t stride_tau, std::int64_t batch_size, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "ungqr_batch"); +} +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "ungqr_batch"); +} +sycl::event ungqr_batch(sycl::queue& queue, std::int64_t* m, std::int64_t* n, std::int64_t* k, + std::complex** a, std::int64_t* lda, std::complex** tau, + std::int64_t group_count, std::int64_t* group_sizes, + std::complex* scratchpad, std::int64_t scratchpad_size, + const std::vector& dependencies) { + throw unimplemented("lapack", "ungqr_batch"); +} + +// BATCH SCRATCHPAD API + +template <> +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, std::int64_t batch_size) { + throw unimplemented("lapack", "getrf_batch_scratchpad_size"); +} +template <> +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size) { + throw unimplemented("lapack", "getrf_batch_scratchpad_size"); +} +template <> +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size) { + throw unimplemented("lapack", "getrf_batch_scratchpad_size"); +} +template <> +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size) { + throw unimplemented("lapack", "getrf_batch_scratchpad_size"); +} +template <> +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, std::int64_t batch_size) { + throw unimplemented("lapack", "getri_batch_scratchpad_size"); +} +template <> +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size) { + throw unimplemented("lapack", "getri_batch_scratchpad_size"); +} +template <> +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, + std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size) { + throw unimplemented("lapack", "getri_batch_scratchpad_size"); +} +template <> +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t n, + std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_ipiv, + std::int64_t batch_size) { + throw unimplemented("lapack", "getri_batch_scratchpad_size"); +} +template <> +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_ipiv, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size) { + throw unimplemented("lapack", "getrs_batch_scratchpad_size"); +} +template <> +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose trans, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_ipiv, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { + throw unimplemented("lapack", "getrs_batch_scratchpad_size"); +} +template <> +std::int64_t getrs_batch_scratchpad_size>( + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { + throw unimplemented("lapack", "getrs_batch_scratchpad_size"); +} +template <> +std::int64_t getrs_batch_scratchpad_size>( + sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t stride_ipiv, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { + throw unimplemented("lapack", "getrs_batch_scratchpad_size"); +} +template <> +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, std::int64_t batch_size) { + throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); +} +template <> +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t stride_a, + std::int64_t stride_tau, std::int64_t batch_size) { + throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); +} +template <> +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size) { + throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); +} +template <> +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t stride_tau, + std::int64_t batch_size) { + throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); +} + +template <> +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size) { + throw unimplemented("lapack", "potrf_batch_scratchpad_size"); +} +template <> +std::int64_t potrf_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, std::int64_t batch_size) { + throw unimplemented("lapack", "potrf_batch_scratchpad_size"); +} +template <> +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t batch_size) { + throw unimplemented("lapack", "potrf_batch_scratchpad_size"); +} +template <> +std::int64_t potrf_batch_scratchpad_size>(sycl::queue& queue, + oneapi::math::uplo uplo, + std::int64_t n, std::int64_t lda, + std::int64_t stride_a, + std::int64_t batch_size) { + throw unimplemented("lapack", "potrf_batch_scratchpad_size"); +} +template <> +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, std::int64_t lda, + std::int64_t stride_a, std::int64_t ldb, + std::int64_t stride_b, std::int64_t batch_size) { + throw unimplemented("lapack", "potrs_batch_scratchpad_size"); +} +template <> +std::int64_t potrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, + std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size) { + throw unimplemented("lapack", "potrs_batch_scratchpad_size"); +} +template <> +std::int64_t potrs_batch_scratchpad_size>( + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size) { + throw unimplemented("lapack", "potrs_batch_scratchpad_size"); +} +template <> +std::int64_t potrs_batch_scratchpad_size>( + sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, + std::int64_t lda, std::int64_t stride_a, std::int64_t ldb, std::int64_t stride_b, + std::int64_t batch_size) { + throw unimplemented("lapack", "potrs_batch_scratchpad_size"); +} +template <> +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_tau, + std::int64_t batch_size) { + throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); +} +template <> +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_tau, + std::int64_t batch_size) { + throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); +} +template <> +std::int64_t ungqr_batch_scratchpad_size>( + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { + throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); +} +template <> +std::int64_t ungqr_batch_scratchpad_size>( + sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t stride_a, std::int64_t stride_tau, std::int64_t batch_size) { + throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); +} +template <> +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getrf_batch_scratchpad_size"); +} +template <> +std::int64_t getrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getrf_batch_scratchpad_size"); +} +template <> +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getrf_batch_scratchpad_size"); +} +template <> +std::int64_t getrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getrf_batch_scratchpad_size"); +} +template <> +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getri_batch_scratchpad_size"); +} +template <> +std::int64_t getri_batch_scratchpad_size(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getri_batch_scratchpad_size"); +} +template <> +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getri_batch_scratchpad_size"); +} +template <> +std::int64_t getri_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* n, + std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getri_batch_scratchpad_size"); +} +template <> +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getrs_batch_scratchpad_size"); +} +template <> +std::int64_t getrs_batch_scratchpad_size(sycl::queue& queue, oneapi::math::transpose* trans, + std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "getrs_batch_scratchpad_size"); +} +template <> +std::int64_t getrs_batch_scratchpad_size>( + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { + throw unimplemented("lapack", "getrs_batch_scratchpad_size"); +} +template <> +std::int64_t getrs_batch_scratchpad_size>( + sycl::queue& queue, oneapi::math::transpose* trans, std::int64_t* n, std::int64_t* nrhs, + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, std::int64_t* group_sizes) { + throw unimplemented("lapack", "getrs_batch_scratchpad_size"); +} +template <> +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); +} +template <> +std::int64_t geqrf_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); +} +template <> +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); +} +template <> +std::int64_t geqrf_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "geqrf_batch_scratchpad_size"); +} +template <> +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); +} +template <> +std::int64_t orgqr_batch_scratchpad_size(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "orgqr_batch_scratchpad_size"); +} +#define POTRF_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrf_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* lda, \ + std::int64_t group_count, std::int64_t* group_sizes) { \ + throw unimplemented("lapack", "potrf_batch_scratchpad_size"); \ + } + +POTRF_GROUP_LAUNCHER_SCRATCH(float) +POTRF_GROUP_LAUNCHER_SCRATCH(double) +POTRF_GROUP_LAUNCHER_SCRATCH(std::complex) +POTRF_GROUP_LAUNCHER_SCRATCH(std::complex) + +#undef POTRF_GROUP_LAUNCHER_SCRATCH + +#define POTRS_GROUP_LAUNCHER_SCRATCH(TYPE) \ + template <> \ + std::int64_t potrs_batch_scratchpad_size( \ + sycl::queue & queue, oneapi::math::uplo * uplo, std::int64_t* n, std::int64_t* nrhs, \ + std::int64_t* lda, std::int64_t* ldb, std::int64_t group_count, \ + std::int64_t* group_sizes) { \ + throw unimplemented("lapack", "potrs_batch_scratchpad_size"); \ + } + +POTRS_GROUP_LAUNCHER_SCRATCH(float) +POTRS_GROUP_LAUNCHER_SCRATCH(double) +POTRS_GROUP_LAUNCHER_SCRATCH(std::complex) +POTRS_GROUP_LAUNCHER_SCRATCH(std::complex) + +#undef POTRS_GROUP_LAUNCHER_SCRATCH + +template <> +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); +} +template <> +std::int64_t ungqr_batch_scratchpad_size>(sycl::queue& queue, std::int64_t* m, + std::int64_t* n, std::int64_t* k, + std::int64_t* lda, + std::int64_t group_count, + std::int64_t* group_sizes) { + throw unimplemented("lapack", "ungqr_batch_scratchpad_size"); +} + +} // namespace armpl +} // namespace lapack +} // namespace math +} // namespace oneapi diff --git a/src/lapack/backends/armpl/armpl_common.hpp b/src/lapack/backends/armpl/armpl_common.hpp new file mode 100644 index 000000000..0e1d67560 --- /dev/null +++ b/src/lapack/backends/armpl/armpl_common.hpp @@ -0,0 +1,183 @@ +/******************************************************************************* +* Copyright 2025 SiPearl +* Copyright 2020-2021 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#ifndef _ARMPL_COMMON_HPP_ +#define _ARMPL_COMMON_HPP_ + +#define __fp16 _Float16 +#define INTEGER64 1 + +#include +#include + +#include "armpl.h" + +#include "oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hpp" +#include "oneapi/math/types.hpp" + +#define GET_MULTI_PTR template get_multi_ptr().get_raw() + +namespace oneapi { +namespace math { +namespace lapack { +namespace armpl { + +template +static inline auto host_task_internal(H& cgh, F f, int) -> decltype(cgh.host_task(f)) { + return cgh.host_task(f); +} + +template +static inline void host_task_internal(H& cgh, F f, long) { +#ifndef __SYCL_DEVICE_ONLY__ + cgh.template single_task(f); +#endif +} + +template +static inline void host_task(H& cgh, F f) { + (void)host_task_internal(cgh, f, 0); +} + +inline char get_operation(oneapi::math::transpose trn) { + switch (trn) { + case oneapi::math::transpose::nontrans: return 'N'; + case oneapi::math::transpose::trans: return 'T'; + case oneapi::math::transpose::conjtrans: return 'C'; + default: throw "Wrong transpose Operation."; + } +} + +inline char get_fill_mode(oneapi::math::uplo ul) { + switch (ul) { + case oneapi::math::uplo::upper: return 'U'; + case oneapi::math::uplo::lower: return 'L'; + default: throw "Wrong fill mode."; + } +} + +inline char get_side_mode(oneapi::math::side lr) { + switch (lr) { + case oneapi::math::side::left: return 'L'; + case oneapi::math::side::right: return 'R'; + default: throw "Wrong side mode."; + } +} + +inline char get_generate(oneapi::math::generate qp) { + switch (qp) { + case oneapi::math::generate::Q: return 'Q'; + case oneapi::math::generate::P: return 'P'; + default: throw "Wrong generate."; + } +} + +inline char get_job(oneapi::math::job jobz) { + switch (jobz) { + case oneapi::math::job::N: return 'N'; + case oneapi::math::job::V: return 'V'; + default: throw "Wrong jobz."; + } +} + +inline char get_jobsvd(oneapi::math::jobsvd job) { + switch (job) { + case oneapi::math::jobsvd::N: return 'N'; + case oneapi::math::jobsvd::A: return 'A'; + case oneapi::math::jobsvd::O: return 'O'; + case oneapi::math::jobsvd::S: return 'S'; + default: throw "Wrong job."; + } +} + +inline char get_diag(oneapi::math::diag diag) { + switch (diag) { + case oneapi::math::diag::N: return 'N'; + case oneapi::math::diag::U: return 'U'; + default: throw "Wrong diag."; + } +} + +/*converting std::complex to cuComplex*/ +/*converting sycl::half to __half*/ +template +struct ArmEquivalentType { + using Type = T; +}; +template <> +struct ArmEquivalentType> { + using Type = armpl_singlecomplex_t; +}; +template <> +struct ArmEquivalentType> { + using Type = armpl_doublecomplex_t; +}; + +template +constexpr bool is_complex = false; +template +constexpr bool is_complex> = true; +template <> +inline constexpr bool is_complex = true; +template <> +inline constexpr bool is_complex = true; + +template +constexpr auto cast_to_int_if_complex(const T& alpha) { + if constexpr (is_complex) { + return static_cast((*((T*)&alpha))); + } + else { + return (std::int64_t)alpha; + } +} + +class armpl_lapacke_error : virtual public std::runtime_error { +protected: + // Lapacke errors are already reported by a printf in lapacke_xerbla, so this may be redundant. + inline std::string lapacke_error_message(std::int64_t info) { + if (info == LAPACK_WORK_MEMORY_ERROR) { + return std::string("Not enough memory to allocate work array\n"); + } + else if (info == LAPACK_TRANSPOSE_MEMORY_ERROR) { + return std::string("Not enough memory to transpose matrix\n"); + } + else if (info < 0) { + return std::string("Wrong parameter number " + std::to_string(-info)); + } + else { + return std::string("Runtime error\n"); + } + } + +public: + explicit armpl_lapacke_error(std::string func, std::int64_t result) + : std::runtime_error("Arm Performance Libraries backend: LAPACKE error in " + func + + ": " + std::string(lapacke_error_message(result))) {} + + virtual ~armpl_lapacke_error() throw() {} +}; + +} // namespace armpl +} // namespace lapack +} // namespace math +} // namespace oneapi + +#endif //_ARMPL_COMMON_HPP_ diff --git a/src/lapack/backends/armpl/armpl_wrappers.cpp b/src/lapack/backends/armpl/armpl_wrappers.cpp new file mode 100644 index 000000000..35df8c96e --- /dev/null +++ b/src/lapack/backends/armpl/armpl_wrappers.cpp @@ -0,0 +1,3389 @@ +/*************************************************************************** +* Copyright 2025 SiPearl +* +* Adapted from cusolver backend. +* +* Copyright (C) Codeplay Software Limited +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* For your convenience, a copy of the License has been included in this +* repository. +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +* +**************************************************************************/ +#include "armpl_common.hpp" + +#include "oneapi/math/exceptions.hpp" +#include "oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hpp" + +namespace oneapi { +namespace math { +namespace lapack { +namespace armpl { + +// BUFFER APIs + +template +inline void gebrd(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tauq, sycl::buffer& taup, + sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto d_acc = d.template get_access(cgh); + auto e_acc = e.template get_access(cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + auto tauq_acc = + tauq.template reinterpret().template get_access( + cgh); + auto taup_acc = + taup.template reinterpret().template get_access( + cgh); + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, m, n, a_acc.GET_MULTI_PTR, lda, d_acc.GET_MULTI_PTR, + e_acc.GET_MULTI_PTR, tauq_acc.GET_MULTI_PTR, taup_acc.GET_MULTI_PTR, + s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define GEBRD_LAUNCHER(TYPE_A, TYPE_B, ROUTINE) \ + void gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tauq, sycl::buffer& taup, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + gebrd(ROUTINE, #ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, scratchpad, \ + scratchpad_size); \ + } + +GEBRD_LAUNCHER(float, float, LAPACKE_sgebrd_work) +GEBRD_LAUNCHER(double, double, LAPACKE_dgebrd_work) +GEBRD_LAUNCHER(std::complex, float, LAPACKE_cgebrd_work) +GEBRD_LAUNCHER(std::complex, double, LAPACKE_zgebrd_work) + +#undef GEBRD_LAUNCHER + +template +inline void gerqf(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, a_acc.GET_MULTI_PTR, lda, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define GERQF_LAUNCHER(TYPE, ROUTINE) \ + void gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + gerqf(ROUTINE, #ROUTINE, queue, m, n, a, lda, tau, scratchpad, scratchpad_size); \ + } + +GERQF_LAUNCHER(float, LAPACKE_sgerqf_work) +GERQF_LAUNCHER(double, LAPACKE_dgerqf_work) +GERQF_LAUNCHER(std::complex, LAPACKE_cgerqf_work) +GERQF_LAUNCHER(std::complex, LAPACKE_zgerqf_work) + +#undef GERQF_LAUNCHER + +template +inline void geqrf(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, a_acc.GET_MULTI_PTR, lda, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define GEQRF_LAUNCHER(TYPE, ROUTINE) \ + void geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + geqrf(ROUTINE, #ROUTINE, queue, m, n, a, lda, tau, scratchpad, scratchpad_size); \ + } + +GEQRF_LAUNCHER(float, LAPACKE_sgeqrf_work) +GEQRF_LAUNCHER(double, LAPACKE_dgeqrf_work) +GEQRF_LAUNCHER(std::complex, LAPACKE_cgeqrf_work) +GEQRF_LAUNCHER(std::complex, LAPACKE_zgeqrf_work) + +#undef GEQRF_LAUNCHER + +template +void getrf(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto ipiv_acc = ipiv.template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, m, n, a_acc.GET_MULTI_PTR, lda, ipiv_acc.GET_MULTI_PTR); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + +#define GETRF_LAUNCHER(TYPE, ROUTINE) \ + void getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + getrf(ROUTINE, #ROUTINE, queue, m, n, a, lda, ipiv); \ + } +} + +GETRF_LAUNCHER(float, LAPACKE_sgetrf_work) +GETRF_LAUNCHER(double, LAPACKE_dgetrf_work) +GETRF_LAUNCHER(std::complex, LAPACKE_cgetrf_work) +GETRF_LAUNCHER(std::complex, LAPACKE_zgetrf_work) + +#undef GETRF_LAUNCHER + +template +void getri(Func func, const char* func_name, sycl::queue& queue, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto ipiv_acc = ipiv.template get_access(cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, n, a_acc.GET_MULTI_PTR, lda, + ipiv_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + +#define GETRI_LAUNCHER(TYPE, ROUTINE) \ + void getri(sycl::queue& queue, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& ipiv, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + getri(ROUTINE, #ROUTINE, queue, n, a, lda, ipiv, scratchpad, scratchpad_size); \ + } +} + +GETRI_LAUNCHER(float, LAPACKE_sgetri_work) +GETRI_LAUNCHER(double, LAPACKE_dgetri_work) +GETRI_LAUNCHER(std::complex, LAPACKE_cgetri_work) +GETRI_LAUNCHER(std::complex, LAPACKE_zgetri_work) + +template +inline void getrs(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, + sycl::buffer& b, std::int64_t ldb) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = + a.template reinterpret().template get_access( + cgh); + auto ipiv_acc = ipiv.template get_access(cgh); + auto b_acc = + b.template reinterpret().template get_access( + cgh); + + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_operation(trans), n, nrhs, a_acc.GET_MULTI_PTR, lda, + ipiv_acc.GET_MULTI_PTR, b_acc.GET_MULTI_PTR, ldb); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define GETRS_LAUNCHER(TYPE, ROUTINE) \ + void getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, \ + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& ipiv, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + getrs(ROUTINE, #ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, b, ldb); \ + } + +GETRS_LAUNCHER(float, LAPACKE_sgetrs_work) +GETRS_LAUNCHER(double, LAPACKE_dgetrs_work) +GETRS_LAUNCHER(std::complex, LAPACKE_cgetrs_work) +GETRS_LAUNCHER(std::complex, LAPACKE_zgetrs_work) + +#undef GETRS_LAUNCHER + +template +inline void gesvd(Func func, const char* func_name, sycl::queue& queue, oneapi::math::jobsvd jobu, + oneapi::math::jobsvd jobvt, std::int64_t m, std::int64_t n, sycl::buffer& a, + std::int64_t lda, sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto s_acc = s.template get_access(cgh); + auto u_acc = + u.template reinterpret().template get_access( + cgh); + auto vt_acc = + vt.template reinterpret().template get_access( + cgh); + auto scratch_acc = scratchpad.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = 0; + if constexpr (is_complex) { + T_B* rwork = new T_B[5 * std::min(m, n)]; + err = func(LAPACK_COL_MAJOR, get_jobsvd(jobu), get_jobsvd(jobvt), m, n, + a_acc.GET_MULTI_PTR, lda, s_acc.GET_MULTI_PTR, u_acc.GET_MULTI_PTR, ldu, + vt_acc.GET_MULTI_PTR, ldvt, scratch_acc.GET_MULTI_PTR, scratchpad_size, + rwork); + delete[] rwork; + } + else { + err = func(LAPACK_COL_MAJOR, get_jobsvd(jobu), get_jobsvd(jobvt), m, n, + a_acc.GET_MULTI_PTR, lda, s_acc.GET_MULTI_PTR, u_acc.GET_MULTI_PTR, ldu, + vt_acc.GET_MULTI_PTR, ldvt, scratch_acc.GET_MULTI_PTR, scratchpad_size); + } + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define GESVD_LAUNCHER(TYPE_A, TYPE_B, ROUTINE) \ + void gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& s, sycl::buffer& u, std::int64_t ldu, \ + sycl::buffer& vt, std::int64_t ldvt, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + gesvd(ROUTINE, #ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, \ + scratchpad, scratchpad_size); \ + } + +GESVD_LAUNCHER(float, float, LAPACKE_sgesvd_work) +GESVD_LAUNCHER(double, double, LAPACKE_dgesvd_work) +GESVD_LAUNCHER(std::complex, float, LAPACKE_cgesvd_work) +GESVD_LAUNCHER(std::complex, double, LAPACKE_zgesvd_work) + +#undef GESVD_LAUNCHER +template +inline void heevd(Func func, const char* func_name, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto w_acc = w.template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_job(jobz), get_fill_mode(uplo), n, + a_acc.GET_MULTI_PTR, lda, w_acc.GET_MULTI_PTR); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define HEEVD_LAUNCHER(TYPE_A, TYPE_B, ROUTINE) \ + void heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + heevd(ROUTINE, #ROUTINE, queue, jobz, uplo, n, a, lda, w); \ + } + +HEEVD_LAUNCHER(std::complex, float, LAPACKE_cheevd) +HEEVD_LAUNCHER(std::complex, double, LAPACKE_zheevd) + +#undef HEEVD_LAUNCHER + +template +inline void hegvd(Func func, const char* func_name, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto b_acc = b.template reinterpret() + .template get_access(cgh); + auto w_acc = w.template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, itype, get_job(jobz), get_fill_mode(uplo), n, + a_acc.GET_MULTI_PTR, lda, b_acc.GET_MULTI_PTR, ldb, w_acc.GET_MULTI_PTR); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define HEGVD_LAUNCHER(TYPE_A, TYPE_B, ROUTINE) \ + void hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + hegvd(ROUTINE, #ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w); \ + } + +HEGVD_LAUNCHER(std::complex, float, LAPACKE_chegvd) +HEGVD_LAUNCHER(std::complex, double, LAPACKE_zhegvd) + +#undef HEGVD_LAUNCHER + +template +inline void hetrd(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto d_acc = d.template get_access(cgh); + auto e_acc = e.template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_acc.GET_MULTI_PTR, + lda, d_acc.GET_MULTI_PTR, e_acc.GET_MULTI_PTR, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define HETRD_LAUNCHER(TYPE_A, TYPE_B, ROUTINE) \ + void hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& d, \ + sycl::buffer& e, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + hetrd(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); \ + } + +HETRD_LAUNCHER(std::complex, float, LAPACKE_chetrd_work) +HETRD_LAUNCHER(std::complex, double, LAPACKE_zhetrd_work) + +#undef HETRD_LAUNCHER + +template +inline void hetrf(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto ipiv_acc = ipiv.template get_access(cgh); + + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_acc.GET_MULTI_PTR, lda, + ipiv_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define HETRF_LAUNCHER(TYPE_A, ROUTINE) \ + void hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& ipiv, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + hetrf(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); \ + } + +HETRF_LAUNCHER(std::complex, LAPACKE_chetrf_work) +HETRF_LAUNCHER(std::complex, LAPACKE_zhetrf_work) + +#undef HETRF_LAUNCHER + +template +inline void orgbr(Func func, const char* func_name, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_generate(vec), m, n, k, a_acc.GET_MULTI_PTR, lda, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define ORGBR_LAUNCHER(TYPE, ROUTINE) \ + void orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + orgbr(ROUTINE, #ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); \ + } + +ORGBR_LAUNCHER(float, LAPACKE_sorgbr_work) +ORGBR_LAUNCHER(double, LAPACKE_dorgbr_work) + +#undef ORGBR_LAUNCHER + +template +inline void orgqr(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, k, a_acc.GET_MULTI_PTR, lda, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define ORGQR_LAUNCHER(TYPE, ROUTINE) \ + void orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + orgqr(ROUTINE, #ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); \ + } + +ORGQR_LAUNCHER(float, LAPACKE_sorgqr_work) +ORGQR_LAUNCHER(double, LAPACKE_dorgqr_work) + +#undef ORGQR_LAUNCHER + +template +inline void orgtr(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_acc.GET_MULTI_PTR, lda, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define ORGTR_LAUNCHER(TYPE, ROUTINE) \ + void orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + orgtr(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); \ + } + +ORGTR_LAUNCHER(float, LAPACKE_sorgtr_work) +ORGTR_LAUNCHER(double, LAPACKE_dorgtr_work) + +#undef ORGTR_LAUNCHER + +template +inline void ormtr(Func func, const char* func_name, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = tau.template reinterpret() + .template get_access(cgh); + auto c_acc = c.template reinterpret() + .template get_access(cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_side_mode(side), get_fill_mode(uplo), + get_operation(trans), m, n, a_acc.GET_MULTI_PTR, lda, tau_acc.GET_MULTI_PTR, + c_acc.GET_MULTI_PTR, ldc, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define ORMTR_LAUNCHER(TYPE, ROUTINE) \ + void ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + ormtr(ROUTINE, #ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, \ + scratchpad_size); \ + } + +ORMTR_LAUNCHER(float, LAPACKE_sormtr_work) +ORMTR_LAUNCHER(double, LAPACKE_dormtr_work) + +#undef ORMTR_LAUNCHER + +template +inline void ormrq(Func func, const char* func_name, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = tau.template reinterpret() + .template get_access(cgh); + auto c_acc = c.template reinterpret() + .template get_access(cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, + n, k, a_acc.GET_MULTI_PTR, lda, tau_acc.GET_MULTI_PTR, + c_acc.GET_MULTI_PTR, ldc, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define ORMRQ_LAUNCHER(TYPE, ROUTINE) \ + void ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + ormrq(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, \ + scratchpad_size); \ + } + +ORMRQ_LAUNCHER(float, LAPACKE_sormrq_work) +ORMRQ_LAUNCHER(double, LAPACKE_dormrq_work) + +#undef ORMRQ_LAUNCHER + +template +inline void ormqr(Func func, const char* func_name, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = + a.template reinterpret().template get_access( + cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto c_acc = c.template reinterpret() + .template get_access(cgh); + + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, + n, k, a_acc.GET_MULTI_PTR, lda, tau_acc.GET_MULTI_PTR, + c_acc.GET_MULTI_PTR, ldc, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define ORMQR_LAUNCHER(TYPE, ROUTINE) \ + void ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + ormqr(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, \ + scratchpad_size); \ + } + +ORMQR_LAUNCHER(float, LAPACKE_sormqr_work) +ORMQR_LAUNCHER(double, LAPACKE_dormqr_work) + +#undef ORMQR_LAUNCHER +template +inline void potrf(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_acc.GET_MULTI_PTR, lda); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define POTRF_LAUNCHER(TYPE, ROUTINE) \ + void potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + potrf(ROUTINE, #ROUTINE, queue, uplo, n, a, lda); \ + } + +POTRF_LAUNCHER(float, LAPACKE_spotrf_work) +POTRF_LAUNCHER(double, LAPACKE_dpotrf_work) +POTRF_LAUNCHER(std::complex, LAPACKE_cpotrf_work) +POTRF_LAUNCHER(std::complex, LAPACKE_zpotrf_work) + +#undef POTRF_LAUNCHER + +template +inline void potri(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_acc.GET_MULTI_PTR, lda); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define POTRI_LAUNCHER(TYPE, ROUTINE) \ + void potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + potri(ROUTINE, #ROUTINE, queue, uplo, n, a, lda); \ + } + +POTRI_LAUNCHER(float, LAPACKE_spotri_work) +POTRI_LAUNCHER(double, LAPACKE_dpotri_work) +POTRI_LAUNCHER(std::complex, LAPACKE_cpotri_work) +POTRI_LAUNCHER(std::complex, LAPACKE_zpotri_work) + +#undef POTRI_LAUNCHER + +template +inline void potrs(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, + sycl::buffer& b, std::int64_t ldb) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = + a.template reinterpret().template get_access( + cgh); + auto b_acc = b.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, nrhs, + a_acc.GET_MULTI_PTR, lda, b_acc.GET_MULTI_PTR, ldb); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define POTRS_LAUNCHER(TYPE, ROUTINE) \ + void potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + potrs(ROUTINE, #ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb); \ + } + +POTRS_LAUNCHER(float, LAPACKE_spotrs_work) +POTRS_LAUNCHER(double, LAPACKE_dpotrs_work) +POTRS_LAUNCHER(std::complex, LAPACKE_cpotrs_work) +POTRS_LAUNCHER(std::complex, LAPACKE_zpotrs_work) + +#undef POTRS_LAUNCHER + +template +inline void syevd(Func func, const char* func_name, sycl::queue& queue, oneapi::math::job jobz, + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& w) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto w_acc = + w.template reinterpret().template get_access( + cgh); + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_job(jobz), get_fill_mode(uplo), n, + a_acc.GET_MULTI_PTR, lda, w_acc.GET_MULTI_PTR); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define SYEVD_LAUNCHER(TYPE, ROUTINE) \ + void syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + syevd(ROUTINE, #ROUTINE, queue, jobz, uplo, n, a, lda, w); \ + } + +SYEVD_LAUNCHER(float, LAPACKE_ssyevd) +SYEVD_LAUNCHER(double, LAPACKE_dsyevd) + +#undef SYEVD_LAUNCHER + +template +inline void sygvd(Func func, const char* func_name, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, + sycl::buffer& a, std::int64_t lda, sycl::buffer& b, std::int64_t ldb, + sycl::buffer& w) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto b_acc = b.template reinterpret() + .template get_access(cgh); + auto w_acc = + w.template reinterpret().template get_access( + cgh); + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, itype, get_job(jobz), get_fill_mode(uplo), n, + a_acc.GET_MULTI_PTR, lda, b_acc.GET_MULTI_PTR, ldb, w_acc.GET_MULTI_PTR); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define SYGVD_LAUNCHER(TYPE, ROUTINE) \ + void sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, std::int64_t lda, \ + sycl::buffer& b, std::int64_t ldb, sycl::buffer& w, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + sygvd(ROUTINE, #ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w); \ + } + +SYGVD_LAUNCHER(float, LAPACKE_ssygvd) +SYGVD_LAUNCHER(double, LAPACKE_dsygvd) + +#undef SYGVD_LAUNCH + +template +inline void sytrd(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& d, + sycl::buffer& e, sycl::buffer& tau, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto d_acc = + d.template reinterpret().template get_access( + cgh); + auto e_acc = + e.template reinterpret().template get_access( + cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_acc.GET_MULTI_PTR, + lda, d_acc.GET_MULTI_PTR, e_acc.GET_MULTI_PTR, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define SYTRD_LAUNCHER(TYPE, ROUTINE) \ + void sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& d, sycl::buffer& e, \ + sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + sytrd(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, scratchpad_size); \ + } + +SYTRD_LAUNCHER(float, LAPACKE_ssytrd_work) +SYTRD_LAUNCHER(double, LAPACKE_dsytrd_work) + +#undef SYTRD_LAUNCHER + +template +inline void sytrf(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, + sycl::buffer& ipiv, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto ipiv_acc = ipiv.template get_access(cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_acc.GET_MULTI_PTR, lda, + ipiv_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define SYTRF_LAUNCHER(TYPE, ROUTINE) \ + void sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& ipiv, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + sytrf(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size); \ + } + +SYTRF_LAUNCHER(float, LAPACKE_ssytrf_work) +SYTRF_LAUNCHER(double, LAPACKE_dsytrf_work) +SYTRF_LAUNCHER(std::complex, LAPACKE_csytrf_work) +SYTRF_LAUNCHER(std::complex, LAPACKE_zsytrf_work) + +#undef SYTRF_LAUNCHER + +template +inline void trtrs(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + oneapi::math::transpose trans, oneapi::math::diag diag, std::int64_t n, + std::int64_t nrhs, sycl::buffer& a, std::int64_t lda, sycl::buffer& b, + std::int64_t ldb) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto b_acc = + b.template reinterpret().template get_access( + cgh); + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), get_operation(trans), get_diag(diag), n, + nrhs, a_acc.GET_MULTI_PTR, lda, b_acc.GET_MULTI_PTR, ldb); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define TRTRS_LAUNCHER(TYPE, ROUTINE) \ + void trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, \ + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& b, std::int64_t ldb, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + trtrs(ROUTINE, #ROUTINE, queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb); \ + } + +TRTRS_LAUNCHER(float, LAPACKE_strtrs) +TRTRS_LAUNCHER(double, LAPACKE_dtrtrs) +TRTRS_LAUNCHER(std::complex, LAPACKE_ctrtrs) +TRTRS_LAUNCHER(std::complex, LAPACKE_ztrtrs) + +#undef TRTRS_LAUNCHER + +template +inline void ungbr(Func func, const char* func_name, sycl::queue& queue, oneapi::math::generate vec, + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, + std::int64_t lda, sycl::buffer& tau, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_generate(vec), m, n, k, a_acc.GET_MULTI_PTR, lda, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define UNGBR_LAUNCHER(TYPE, ROUTINE) \ + void ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, std::int64_t n, \ + std::int64_t k, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + ungbr(ROUTINE, #ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, scratchpad_size); \ + } + +UNGBR_LAUNCHER(std::complex, LAPACKE_cungbr_work) +UNGBR_LAUNCHER(std::complex, LAPACKE_zungbr_work) + +#undef UNGBR_LAUNCHER + +template +inline void ungqr(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, sycl::buffer& a, std::int64_t lda, + sycl::buffer& tau, sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, k, a_acc.GET_MULTI_PTR, lda, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define UNGQR_LAUNCHER(TYPE, ROUTINE) \ + void ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + ungqr(ROUTINE, #ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size); \ + } + +UNGQR_LAUNCHER(std::complex, LAPACKE_cungqr_work) +UNGQR_LAUNCHER(std::complex, LAPACKE_zungqr_work) + +#undef UNGQR_LAUNCHER + +template +inline void ungtr(Func func, const char* func_name, sycl::queue& queue, oneapi::math::uplo uplo, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_acc.GET_MULTI_PTR, lda, + tau_acc.GET_MULTI_PTR, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define UNGTR_LAUNCHER(TYPE, ROUTINE) \ + void ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + ungtr(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size); \ + } + +UNGTR_LAUNCHER(std::complex, LAPACKE_cungtr_work) +UNGTR_LAUNCHER(std::complex, LAPACKE_zungtr_work) + +#undef UNGTR_LAUNCHER + +template +inline void unmrq(Func func, const char* func_name, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto c_acc = c.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, + n, k, a_acc.GET_MULTI_PTR, lda, tau_acc.GET_MULTI_PTR, + c_acc.GET_MULTI_PTR, ldc, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define UNMRQ_LAUNCHER(TYPE, ROUTINE) \ + void unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + unmrq(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, \ + scratchpad_size); \ + } + +UNMRQ_LAUNCHER(std::complex, LAPACKE_cunmrq_work) +UNMRQ_LAUNCHER(std::complex, LAPACKE_zunmrq_work) + +#undef UNMRQ_LAUNCHER + +template +inline void unmqr(Func func, const char* func_name, sycl::queue& queue, oneapi::math::side side, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, std::int64_t k, + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, + std::int64_t ldc, sycl::buffer& s, std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto c_acc = c.template reinterpret() + .template get_access(cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, + n, k, a_acc.GET_MULTI_PTR, lda, tau_acc.GET_MULTI_PTR, + c_acc.GET_MULTI_PTR, ldc, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define UNMQR_LAUNCHER(TYPE, ROUTINE) \ + void unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, sycl::buffer& a, \ + std::int64_t lda, sycl::buffer& tau, sycl::buffer& c, std::int64_t ldc, \ + sycl::buffer& scratchpad, std::int64_t scratchpad_size) { \ + unmqr(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, ldc, scratchpad, \ + scratchpad_size); \ + } + +UNMQR_LAUNCHER(std::complex, LAPACKE_cunmqr_work) +UNMQR_LAUNCHER(std::complex, LAPACKE_zunmqr_work) + +#undef UNMQR_LAUNCHER + +template +inline void unmtr(Func func, const char* func_name, sycl::queue& queue, oneapi::math::side side, + oneapi::math::uplo uplo, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, + sycl::buffer& c, std::int64_t ldc, sycl::buffer& s, + std::int64_t scratchpad_size) { + using ArmDataType = typename ArmEquivalentType::Type; + queue.submit([&](sycl::handler& cgh) { + auto a_acc = a.template reinterpret() + .template get_access(cgh); + auto tau_acc = + tau.template reinterpret().template get_access( + cgh); + auto c_acc = c.template reinterpret() + .template get_access(cgh); + auto s_acc = s.template reinterpret() + .template get_access(cgh); + + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_side_mode(side), get_fill_mode(uplo), + get_operation(trans), m, n, a_acc.GET_MULTI_PTR, lda, tau_acc.GET_MULTI_PTR, + c_acc.GET_MULTI_PTR, ldc, s_acc.GET_MULTI_PTR, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); +} + +#define UNMTR_LAUNCHER(TYPE, ROUTINE) \ + void unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, \ + sycl::buffer& a, std::int64_t lda, sycl::buffer& tau, \ + sycl::buffer& c, std::int64_t ldc, sycl::buffer& scratchpad, \ + std::int64_t scratchpad_size) { \ + unmtr(ROUTINE, #ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, scratchpad, \ + scratchpad_size); \ + } + +UNMTR_LAUNCHER(std::complex, LAPACKE_cunmtr_work) +UNMTR_LAUNCHER(std::complex, LAPACKE_zunmtr_work) + +#undef UNMTR_LAUNCHER + +// USM APIs + +template +inline sycl::event gebrd(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, T_A* a, std::int64_t lda, T_B* d, T_B* e, T_A* tauq, + T_A* taup, T_A* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tauq_ = reinterpret_cast(tauq); + auto taup_ = reinterpret_cast(taup); + auto s_ = reinterpret_cast(s); + std::int64_t err = + func(LAPACK_COL_MAJOR, m, n, a_, lda, d_, e_, tauq_, taup_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define GEBRD_LAUNCHER_USM(TYPE_A, TYPE_B, ROUTINE) \ + sycl::event gebrd(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tauq, TYPE_A* taup, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return gebrd(ROUTINE, #ROUTINE, queue, m, n, a, lda, d, e, tauq, taup, scratchpad, \ + scratchpad_size, dependencies); \ + } + +GEBRD_LAUNCHER_USM(float, float, LAPACKE_sgebrd_work) +GEBRD_LAUNCHER_USM(double, double, LAPACKE_dgebrd_work) +GEBRD_LAUNCHER_USM(std::complex, float, LAPACKE_cgebrd_work) +GEBRD_LAUNCHER_USM(std::complex, double, LAPACKE_zgebrd_work) + +#undef GEBRD_LAUNCHER_USM + +template +inline sycl::event gerqf(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, T* tau, T* s, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, a_, lda, tau_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define GERQF_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event gerqf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return gerqf(ROUTINE, #ROUTINE, queue, m, n, a, lda, tau, scratchpad, scratchpad_size, \ + dependencies); \ + } + +GERQF_LAUNCHER_USM(float, LAPACKE_sgerqf_work) +GERQF_LAUNCHER_USM(double, LAPACKE_dgerqf_work) +GERQF_LAUNCHER_USM(std::complex, LAPACKE_cgerqf_work) +GERQF_LAUNCHER_USM(std::complex, LAPACKE_zgerqf_work) + +#undef GERQF_LAUNCHER_USM + +template +inline sycl::event geqrf(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, T* tau, T* s, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, a_, lda, tau_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define GEQRF_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event geqrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return geqrf(ROUTINE, #ROUTINE, queue, m, n, a, lda, tau, scratchpad, scratchpad_size, \ + dependencies); \ + } + +GEQRF_LAUNCHER_USM(float, LAPACKE_sgeqrf_work) +GEQRF_LAUNCHER_USM(double, LAPACKE_dgeqrf_work) +GEQRF_LAUNCHER_USM(std::complex, LAPACKE_cgeqrf_work) +GEQRF_LAUNCHER_USM(std::complex, LAPACKE_zgeqrf_work) + +#undef GEQRF_LAUNCHER_USM + +template +inline sycl::event getrf(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, T* a, std::int64_t lda, std::int64_t* ipiv, T* s, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv); + auto s_ = reinterpret_cast(s); + + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, a_, lda, ipiv_); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + + return done; +} + +#define GETRF_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event getrf(sycl::queue& queue, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return getrf(ROUTINE, #ROUTINE, queue, m, n, a, lda, ipiv, scratchpad, scratchpad_size, \ + dependencies); \ + } + +GETRF_LAUNCHER_USM(float, LAPACKE_sgetrf_work) +GETRF_LAUNCHER_USM(double, LAPACKE_dgetrf_work) +GETRF_LAUNCHER_USM(std::complex, LAPACKE_cgetrf_work) +GETRF_LAUNCHER_USM(std::complex, LAPACKE_zgetrf_work) + +#undef GETRF_LAUNCHER_USM + +template +inline sycl::event getri(Func func, const char* func_name, sycl::queue& queue, std::int64_t n, T* a, + std::int64_t lda, std::int64_t* ipiv, T* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv); + auto s_ = reinterpret_cast(s); + + std::int64_t err = func(LAPACK_COL_MAJOR, n, a_, lda, ipiv_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + + return done; +} + +#define GETRI_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event getri(sycl::queue& queue, std::int64_t n, TYPE* a, std::int64_t lda, \ + std::int64_t* ipiv, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return getri(ROUTINE, #ROUTINE, queue, n, a, lda, ipiv, scratchpad, scratchpad_size, \ + dependencies); \ + } + +GETRI_LAUNCHER_USM(float, LAPACKE_sgetri_work) +GETRI_LAUNCHER_USM(double, LAPACKE_dgetri_work) +GETRI_LAUNCHER_USM(std::complex, LAPACKE_cgetri_work) +GETRI_LAUNCHER_USM(std::complex, LAPACKE_zgetri_work) + +#undef GETRI_LAUNCHER_USM + +template +inline sycl::event getrs(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::transpose trans, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, std::int64_t* ipiv, T* b, std::int64_t ldb, T* s, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv); + auto b_ = reinterpret_cast(b); + + std::int64_t err = + func(LAPACK_COL_MAJOR, get_operation(trans), n, nrhs, a_, lda, ipiv_, b_, ldb); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + + return done; +} + +#define GETRS_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event getrs(sycl::queue& queue, oneapi::math::transpose trans, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, std::int64_t* ipiv, TYPE* b, \ + std::int64_t ldb, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return getrs(ROUTINE, #ROUTINE, queue, trans, n, nrhs, a, lda, ipiv, b, ldb, scratchpad, \ + scratchpad_size, dependencies); \ + } + +GETRS_LAUNCHER_USM(float, LAPACKE_sgetrs_work) +GETRS_LAUNCHER_USM(double, LAPACKE_dgetrs_work) +GETRS_LAUNCHER_USM(std::complex, LAPACKE_cgetrs_work) +GETRS_LAUNCHER_USM(std::complex, LAPACKE_zgetrs_work) + +#undef GETRS_LAUNCHER_USM + +template +inline sycl::event gesvd(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, std::int64_t m, + std::int64_t n, T_A* a, std::int64_t lda, T_B* s, T_A* u, std::int64_t ldu, + T_A* vt, std::int64_t ldvt, T_A* scratch, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto s_ = reinterpret_cast(s); + auto u_ = reinterpret_cast(u); + auto vt_ = reinterpret_cast(vt); + auto scratch_ = reinterpret_cast(scratch); + std::int64_t err = 0; + if constexpr (is_complex) { + T_B* rwork = new T_B[5 * std::min(m, n)]; + err = func(LAPACK_COL_MAJOR, get_jobsvd(jobu), get_jobsvd(jobvt), m, n, a_, lda, s_, + u_, ldu, vt_, ldvt, scratch_, scratchpad_size, rwork); + delete[] rwork; + } + else { + err = func(LAPACK_COL_MAJOR, get_jobsvd(jobu), get_jobsvd(jobvt), m, n, a_, lda, s_, + u_, ldu, vt_, ldvt, scratch_, scratchpad_size); + } + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define GESVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROUTINE) \ + sycl::event gesvd(sycl::queue& queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* s, \ + TYPE_A* u, std::int64_t ldu, TYPE_A* vt, std::int64_t ldvt, \ + TYPE_A* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return gesvd(ROUTINE, #ROUTINE, queue, jobu, jobvt, m, n, a, lda, s, u, ldu, vt, ldvt, \ + scratchpad, scratchpad_size, dependencies); \ + } + +GESVD_LAUNCHER_USM(float, float, LAPACKE_sgesvd_work) +GESVD_LAUNCHER_USM(double, double, LAPACKE_dgesvd_work) +GESVD_LAUNCHER_USM(std::complex, float, LAPACKE_cgesvd_work) +GESVD_LAUNCHER_USM(std::complex, double, LAPACKE_zgesvd_work) + +#undef GESVD_LAUNCHER_USM + +template +inline sycl::event heevd(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_B*& w, const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + std::int64_t err = + func(LAPACK_COL_MAJOR, get_job(jobz), get_fill_mode(uplo), n, a_, lda, w_); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define HEEVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROUTINE) \ + sycl::event heevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, TYPE_A* a, std::int64_t lda, TYPE_B* w, TYPE_A* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return heevd(ROUTINE, #ROUTINE, queue, jobz, uplo, n, a, lda, w, dependencies); \ + } + +HEEVD_LAUNCHER_USM(std::complex, float, LAPACKE_cheevd) +HEEVD_LAUNCHER_USM(std::complex, double, LAPACKE_zheevd) + +#undef HEEVD_LAUNCHER_USM + +template +inline sycl::event hegvd(Func func, const char* func_name, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T_A*& a, + std::int64_t lda, T_A*& b, std::int64_t ldb, T_B*& w, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + std::int64_t err = func(LAPACK_COL_MAJOR, itype, get_job(jobz), get_fill_mode(uplo), n, + a_, lda, b_, ldb, w_); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define HEGVD_LAUNCHER_USM(TYPE_A, TYPE_B, ROUTINE) \ + sycl::event hegvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a, std::int64_t lda, \ + TYPE_A* b, std::int64_t ldb, TYPE_B* w, TYPE_A* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return hegvd(ROUTINE, #ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w, \ + dependencies); \ + } + +HEGVD_LAUNCHER_USM(std::complex, float, LAPACKE_chegvd) +HEGVD_LAUNCHER_USM(std::complex, double, LAPACKE_zhegvd) + +#undef HEGVD_LAUNCHER_USM + +template +inline sycl::event hetrd(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T_A* a, std::int64_t lda, T_B* d, + T_B* e, T_A* tau, T_A* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_, lda, d_, e_, tau_, + s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define HETRD_LAUNCHER_USM(TYPE_A, TYPE_B, ROUTINE) \ + sycl::event hetrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE_A* a, \ + std::int64_t lda, TYPE_B* d, TYPE_B* e, TYPE_A* tau, TYPE_A* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return hetrd(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, \ + scratchpad_size, dependencies); \ + } + +HETRD_LAUNCHER_USM(std::complex, float, LAPACKE_chetrd_work) +HETRD_LAUNCHER_USM(std::complex, double, LAPACKE_zhetrd_work) + +#undef HETRD_LAUNCHER_USM + +template +inline sycl::event hetrf(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + std::int64_t* ipiv, T* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv); + auto s_ = reinterpret_cast(s); + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_, lda, ipiv_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define HETRF_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event hetrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return hetrf(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, \ + dependencies); \ + } + +HETRF_LAUNCHER_USM(std::complex, LAPACKE_chetrf_work) +HETRF_LAUNCHER_USM(std::complex, LAPACKE_zhetrf_work) + +#undef HETRF_LAUNCHER_USM + +template +inline sycl::event orgbr(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, + T* a, std::int64_t lda, T* tau, T* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + std::int64_t err = func(LAPACK_COL_MAJOR, get_generate(vec), m, n, k, a_, lda, tau_, s_, + scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define ORGBR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event orgbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return orgbr(ROUTINE, #ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ + scratchpad_size, dependencies); \ + } + +ORGBR_LAUNCHER_USM(float, LAPACKE_sorgbr_work) +ORGBR_LAUNCHER_USM(double, LAPACKE_dorgbr_work) + +#undef ORGBR_LAUNCHER_USM + +template +inline sycl::event orgqr(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* s, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, k, a_, lda, tau_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define ORGQR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event orgqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return orgqr(ROUTINE, #ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, \ + dependencies); \ + } + +ORGQR_LAUNCHER_USM(float, LAPACKE_sorgqr_work) +ORGQR_LAUNCHER_USM(double, LAPACKE_dorgqr_work) + +#undef ORGQR_LAUNCHER_USM + +template +inline sycl::event orgtr(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_, lda, tau_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define ORGTR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event orgtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return orgtr(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, \ + dependencies); \ + } + +ORGTR_LAUNCHER_USM(float, LAPACKE_sorgtr_work) +ORGTR_LAUNCHER_USM(double, LAPACKE_dorgtr_work) + +#undef ORGTR_LAUNCHER_USM + +template +inline sycl::event ormtr(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* s, + std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); + std::int64_t err = + func(LAPACK_COL_MAJOR, get_side_mode(side), get_fill_mode(uplo), + get_operation(trans), m, n, a_, lda, tau_, c_, ldc, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define ORMTR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event ormtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return ormtr(ROUTINE, #ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, \ + scratchpad, scratchpad_size, dependencies); \ + } + +ORMTR_LAUNCHER_USM(float, LAPACKE_sormtr_work) +ORMTR_LAUNCHER_USM(double, LAPACKE_dormtr_work) + +#undef ORMTR_LAUNCHER_USM + +template +inline sycl::event ormrq(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, + n, k, a_, lda, tau_, c_, ldc, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define ORMRQ_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event ormrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return ormrq(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, ldc, \ + scratchpad, scratchpad_size, dependencies); \ + } + +ORMRQ_LAUNCHER_USM(float, LAPACKE_sormrq_work) +ORMRQ_LAUNCHER_USM(double, LAPACKE_dormrq_work) + +#undef ORMRQ_LAUNCHER_USM + +template +inline sycl::event ormqr(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, + n, k, a_, lda, tau_, c_, ldc, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define ORMQR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event ormqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return ormqr(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, ldc, \ + scratchpad, scratchpad_size, dependencies); \ + } + +ORMQR_LAUNCHER_USM(float, LAPACKE_sormqr_work) +ORMQR_LAUNCHER_USM(double, LAPACKE_dormqr_work) + +#undef ORMQR_LAUNCHER_USM + +template +inline sycl::event potrf(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_, lda); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define POTRF_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event potrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return potrf(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, dependencies); \ + } + +POTRF_LAUNCHER_USM(float, LAPACKE_spotrf_work) +POTRF_LAUNCHER_USM(double, LAPACKE_dpotrf_work) +POTRF_LAUNCHER_USM(std::complex, LAPACKE_cpotrf_work) +POTRF_LAUNCHER_USM(std::complex, LAPACKE_zpotrf_work) + +#undef POTRF_LAUNCHER_USM + +template +inline sycl::event potri(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_, lda); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define POTRI_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event potri(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return potri(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, dependencies); \ + } + +POTRI_LAUNCHER_USM(float, LAPACKE_spotri_work) +POTRI_LAUNCHER_USM(double, LAPACKE_dpotri_work) +POTRI_LAUNCHER_USM(std::complex, LAPACKE_cpotri_work) +POTRI_LAUNCHER_USM(std::complex, LAPACKE_zpotri_work) + +#undef POTRI_LAUNCHER_USM + +// cusolverDnXpotrs does not use scratchpad memory +template +inline sycl::event potrs(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, T* b, std::int64_t ldb, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, nrhs, a_, lda, b_, ldb); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define POTRS_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event potrs(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, \ + std::int64_t nrhs, TYPE* a, std::int64_t lda, TYPE* b, std::int64_t ldb, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return potrs(ROUTINE, #ROUTINE, queue, uplo, n, nrhs, a, lda, b, ldb, dependencies); \ + } + +POTRS_LAUNCHER_USM(float, LAPACKE_spotrs_work) +POTRS_LAUNCHER_USM(double, LAPACKE_dpotrs_work) +POTRS_LAUNCHER_USM(std::complex, LAPACKE_cpotrs_work) +POTRS_LAUNCHER_USM(std::complex, LAPACKE_zpotrs_work) + +#undef POTRS_LAUNCHER_USM + +template +inline sycl::event syevd(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* w, const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto w_ = reinterpret_cast(w); + std::int64_t err = + func(LAPACK_COL_MAJOR, get_job(jobz), get_fill_mode(uplo), n, a_, lda, w_); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define SYEVD_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event syevd(sycl::queue& queue, oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, TYPE* a, std::int64_t lda, TYPE* w, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return syevd(ROUTINE, #ROUTINE, queue, jobz, uplo, n, a, lda, w, dependencies); \ + } + +SYEVD_LAUNCHER_USM(float, LAPACKE_ssyevd) +SYEVD_LAUNCHER_USM(double, LAPACKE_dsyevd) + +#undef SYEVD_LAUNCHER_USM + +template +inline sycl::event sygvd(Func func, const char* func_name, sycl::queue& queue, std::int64_t itype, + oneapi::math::job jobz, oneapi::math::uplo uplo, std::int64_t n, T* a, + std::int64_t lda, T* b, std::int64_t ldb, T* w, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + auto w_ = reinterpret_cast(w); + std::int64_t err = func(LAPACK_COL_MAJOR, itype, get_job(jobz), get_fill_mode(uplo), n, + a_, lda, b_, ldb, w_); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define SYGVD_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event sygvd(sycl::queue& queue, std::int64_t itype, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, TYPE* a, std::int64_t lda, TYPE* b, \ + std::int64_t ldb, TYPE* w, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return sygvd(ROUTINE, #ROUTINE, queue, itype, jobz, uplo, n, a, lda, b, ldb, w, \ + dependencies); \ + } + +SYGVD_LAUNCHER_USM(float, LAPACKE_ssygvd) +SYGVD_LAUNCHER_USM(double, LAPACKE_dsygvd) + +#undef SYGVD_LAUNCHER_USM + +template +inline sycl::event sytrd(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* d, + T* e, T* tau, T* s, std::int64_t scratchpad_size, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto d_ = reinterpret_cast(d); + auto e_ = reinterpret_cast(e); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_, lda, d_, e_, tau_, + s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define SYTRD_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event sytrd(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* d, TYPE* e, TYPE* tau, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return sytrd(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, d, e, tau, scratchpad, \ + scratchpad_size, dependencies); \ + } + +SYTRD_LAUNCHER_USM(float, LAPACKE_ssytrd_work) +SYTRD_LAUNCHER_USM(double, LAPACKE_dsytrd_work) + +#undef SYTRD_LAUNCHER_USM + +template +inline sycl::event sytrf(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, + std::int64_t* ipiv, T* s, std::int64_t scratchpad_size, + + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto ipiv_ = reinterpret_cast(ipiv); + auto s_ = reinterpret_cast(s); + + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_, lda, ipiv_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + + return done; +} + +#define SYTRF_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event sytrf(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, std::int64_t* ipiv, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return sytrf(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, ipiv, scratchpad, scratchpad_size, \ + dependencies); \ + } + +SYTRF_LAUNCHER_USM(float, LAPACKE_ssytrf_work) +SYTRF_LAUNCHER_USM(double, LAPACKE_dsytrf_work) +SYTRF_LAUNCHER_USM(std::complex, LAPACKE_csytrf_work) +SYTRF_LAUNCHER_USM(std::complex, LAPACKE_zsytrf_work) + +#undef SYTRF_LAUNCHER_USM + +template +inline sycl::event trtrs(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, oneapi::math::transpose trans, + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, T* a, + std::int64_t lda, T* b, std::int64_t ldb, + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto b_ = reinterpret_cast(b); + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), get_operation(trans), + get_diag(diag), n, nrhs, a_, lda, b_, ldb); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + + return done; +} + +#define TRTRS_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event trtrs(sycl::queue& queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, \ + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, TYPE* a, \ + std::int64_t lda, TYPE* b, std::int64_t ldb, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return trtrs(ROUTINE, #ROUTINE, queue, uplo, trans, diag, n, nrhs, a, lda, b, ldb, \ + dependencies); \ + } + +TRTRS_LAUNCHER_USM(float, LAPACKE_strtrs) +TRTRS_LAUNCHER_USM(double, LAPACKE_dtrtrs) +TRTRS_LAUNCHER_USM(std::complex, LAPACKE_ctrtrs) +TRTRS_LAUNCHER_USM(std::complex, LAPACKE_ztrtrs) + +#undef TRTRS_LAUNCHER_USM + +template +inline sycl::event ungbr(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, std::int64_t k, + T* a, std::int64_t lda, T* tau, T* s, std::int64_t scratchpad_size, + + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + + std::int64_t err = func(LAPACK_COL_MAJOR, get_generate(vec), m, n, k, a_, lda, tau_, s_, + scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define UNGBR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event ungbr(sycl::queue& queue, oneapi::math::generate vec, std::int64_t m, \ + std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, TYPE* tau, \ + TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return ungbr(ROUTINE, #ROUTINE, queue, vec, m, n, k, a, lda, tau, scratchpad, \ + scratchpad_size, dependencies); \ + } + +UNGBR_LAUNCHER_USM(std::complex, LAPACKE_cungbr_work) +UNGBR_LAUNCHER_USM(std::complex, LAPACKE_zungbr_work) + +#undef UNGBR_LAUNCHER_USM + +template +inline sycl::event ungqr(Func func, const char* func_name, sycl::queue& queue, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* s, + std::int64_t scratchpad_size, + + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, k, a_, lda, tau_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define UNGQR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event ungqr(sycl::queue& queue, std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return ungqr(ROUTINE, #ROUTINE, queue, m, n, k, a, lda, tau, scratchpad, scratchpad_size, \ + dependencies); \ + } + +UNGQR_LAUNCHER_USM(std::complex, LAPACKE_cungqr_work) +UNGQR_LAUNCHER_USM(std::complex, LAPACKE_zungqr_work) + +#undef UNGQR_LAUNCHER_USM + +template +inline sycl::event ungtr(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, T* a, std::int64_t lda, T* tau, + T* s, std::int64_t scratchpad_size, + + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto s_ = reinterpret_cast(s); + + std::int64_t err = + func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, a_, lda, tau_, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define UNGTR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event ungtr(sycl::queue& queue, oneapi::math::uplo uplo, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* scratchpad, std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return ungtr(ROUTINE, #ROUTINE, queue, uplo, n, a, lda, tau, scratchpad, scratchpad_size, \ + dependencies); \ + } + +UNGTR_LAUNCHER_USM(std::complex, LAPACKE_cungtr_work) +UNGTR_LAUNCHER_USM(std::complex, LAPACKE_zungtr_work) + +#undef UNGTR_LAUNCHER_USM + +template +inline sycl::event unmrq(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* s, std::int64_t scratchpad_size, + + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); + + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, + n, k, a_, lda, tau_, c_, ldc, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define UNMRQ_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event unmrq(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return unmrq(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, ldc, \ + scratchpad, scratchpad_size, dependencies); \ + } + +UNMRQ_LAUNCHER_USM(std::complex, LAPACKE_cunmrq_work) +UNMRQ_LAUNCHER_USM(std::complex, LAPACKE_zunmrq_work) + +#undef UNMRQ_LAUNCHER_USM + +template +inline sycl::event unmqr(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, std::int64_t m, + std::int64_t n, std::int64_t k, T* a, std::int64_t lda, T* tau, T* c, + std::int64_t ldc, T* s, std::int64_t scratchpad_size, + + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); + + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, + n, k, a_, lda, tau_, c_, ldc, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define UNMQR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event unmqr(sycl::queue& queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, TYPE* a, std::int64_t lda, \ + TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return unmqr(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, a, lda, tau, c, ldc, \ + scratchpad, scratchpad_size, dependencies); \ + } + +UNMQR_LAUNCHER_USM(std::complex, LAPACKE_cunmqr_work) +UNMQR_LAUNCHER_USM(std::complex, LAPACKE_zunmqr_work) + +#undef UNMQR_LAUNCHER_USM + +template +inline sycl::event unmtr(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, T* a, + std::int64_t lda, T* tau, T* c, std::int64_t ldc, T* s, + std::int64_t scratchpad_size, + + const std::vector& dependencies) { + using ArmDataType = typename ArmEquivalentType::Type; + auto done = queue.submit([&](sycl::handler& cgh) { + int64_t num_events = dependencies.size(); + for (int64_t i = 0; i < num_events; i++) { + cgh.depends_on(dependencies[i]); + } + host_task(cgh, [=]() { + auto a_ = reinterpret_cast(a); + auto tau_ = reinterpret_cast(tau); + auto c_ = reinterpret_cast(c); + auto s_ = reinterpret_cast(s); + + std::int64_t err = + func(LAPACK_COL_MAJOR, get_side_mode(side), get_fill_mode(uplo), + get_operation(trans), m, n, a_, lda, tau_, c_, ldc, s_, scratchpad_size); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }); + return done; +} + +#define UNMTR_LAUNCHER_USM(TYPE, ROUTINE) \ + sycl::event unmtr(sycl::queue& queue, oneapi::math::side side, oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, TYPE* a, \ + std::int64_t lda, TYPE* tau, TYPE* c, std::int64_t ldc, TYPE* scratchpad, \ + std::int64_t scratchpad_size, \ + const std::vector& dependencies) { \ + return unmtr(ROUTINE, #ROUTINE, queue, side, uplo, trans, m, n, a, lda, tau, c, ldc, \ + scratchpad, scratchpad_size, dependencies); \ + } + +UNMTR_LAUNCHER_USM(std::complex, LAPACKE_cunmtr_work) +UNMTR_LAUNCHER_USM(std::complex, LAPACKE_zunmtr_work) + +#undef UNMTR_LAUNCHER_USM + +// SCRATCHPAD APIs +template +inline void gebrd_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit( + [&](sycl::handler& + cgh) { //auto w_acc = work_query.template get_access(cgh); + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, m, n, nullptr, lda, nullptr, nullptr, + nullptr, nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define GEBRD_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t gebrd_scratchpad_size(sycl::queue & queue, std::int64_t m, std::int64_t n, \ + std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + gebrd_scratchpad_size(ROUTINE, #ROUTINE, queue, m, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +GEBRD_LAUNCHER_SCRATCH(float, LAPACKE_sgebrd_work) +GEBRD_LAUNCHER_SCRATCH(double, LAPACKE_dgebrd_work) +GEBRD_LAUNCHER_SCRATCH(std::complex, LAPACKE_cgebrd_work) +GEBRD_LAUNCHER_SCRATCH(std::complex, LAPACKE_zgebrd_work) + +#undef GEBRD_LAUNCHER_SCRATCH + +template +inline void gerqf_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, m, n, nullptr, lda, nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define GERQF_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t gerqf_scratchpad_size(sycl::queue & queue, std::int64_t m, std::int64_t n, \ + std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + gerqf_scratchpad_size(ROUTINE, #ROUTINE, queue, m, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +GERQF_LAUNCHER_SCRATCH(float, LAPACKE_sgerqf_work) +GERQF_LAUNCHER_SCRATCH(double, LAPACKE_dgerqf_work) +GERQF_LAUNCHER_SCRATCH(std::complex, LAPACKE_cgerqf_work) +GERQF_LAUNCHER_SCRATCH(std::complex, LAPACKE_zgerqf_work) + +template +inline void geqrf_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, m, n, nullptr, lda, nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define GEQRF_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t geqrf_scratchpad_size(sycl::queue & queue, std::int64_t m, std::int64_t n, \ + std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + geqrf_scratchpad_size(ROUTINE, #ROUTINE, queue, m, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +GEQRF_LAUNCHER_SCRATCH(float, LAPACKE_sgeqrf_work) +GEQRF_LAUNCHER_SCRATCH(double, LAPACKE_dgeqrf_work) +GEQRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_cgeqrf_work) +GEQRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_zgeqrf_work) + +#undef GEQRF_LAUNCHER_SCRATCH + +template +inline void gesvd_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, + std::int64_t m, std::int64_t n, std::int64_t lda, + std::int64_t ldu, std::int64_t ldvt, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = 0; + if constexpr (is_complex) { + err = func(LAPACK_COL_MAJOR, get_jobsvd(jobu), get_jobsvd(jobvt), m, n, nullptr, + lda, nullptr, nullptr, ldu, nullptr, ldvt, work_query, -1, nullptr); + } + else { + err = func(LAPACK_COL_MAJOR, get_jobsvd(jobu), get_jobsvd(jobvt), m, n, nullptr, + lda, nullptr, nullptr, ldu, nullptr, ldvt, work_query, -1); + } + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define GESVD_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t gesvd_scratchpad_size( \ + sycl::queue & queue, oneapi::math::jobsvd jobu, oneapi::math::jobsvd jobvt, \ + std::int64_t m, std::int64_t n, std::int64_t lda, std::int64_t ldu, std::int64_t ldvt) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + gesvd_scratchpad_size(ROUTINE, #ROUTINE, queue, jobu, jobvt, m, n, lda, ldu, ldvt, \ + &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +GESVD_LAUNCHER_SCRATCH(float, LAPACKE_sgesvd_work) +GESVD_LAUNCHER_SCRATCH(double, LAPACKE_dgesvd_work) +GESVD_LAUNCHER_SCRATCH(std::complex, LAPACKE_cgesvd_work) +GESVD_LAUNCHER_SCRATCH(std::complex, LAPACKE_zgesvd_work) + +#undef GESVD_LAUNCHER_SCRATCH + +#define GETRF_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t getrf_scratchpad_size(sycl::queue & queue, std::int64_t m, std::int64_t n, \ + std::int64_t lda) { \ + return 0; \ + } + +GETRF_LAUNCHER_SCRATCH(float, LAPACKE_sgetrf_work) +GETRF_LAUNCHER_SCRATCH(double, LAPACKE_dgetrf_work) +GETRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_cgetrf_work) +GETRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_zgetrf_work) + +#undef GETRF_LAUNCHER_SCRATCH + +template +inline void getri_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + std::int64_t n, std::int64_t lda, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, n, nullptr, lda, nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define GETRI_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t getri_scratchpad_size(sycl::queue & queue, std::int64_t n, \ + std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + getri_scratchpad_size(ROUTINE, #ROUTINE, queue, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +GETRI_LAUNCHER_SCRATCH(float, LAPACKE_sgetri_work) +GETRI_LAUNCHER_SCRATCH(double, LAPACKE_dgetri_work) +GETRI_LAUNCHER_SCRATCH(std::complex, LAPACKE_cgetri_work) +GETRI_LAUNCHER_SCRATCH(std::complex, LAPACKE_zgetri_work) + +#undef GETRI_LAUNCHER_SCRATCH + +#define GETRS_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t getrs_scratchpad_size(sycl::queue & queue, oneapi::math::transpose trans, \ + std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ + std::int64_t ldb) { \ + return 0; \ + } + +GETRS_LAUNCHER_SCRATCH(float, LAPACKE_sgetrs_work) +GETRS_LAUNCHER_SCRATCH(double, LAPACKE_dgetrs_work) +GETRS_LAUNCHER_SCRATCH(std::complex, LAPACKE_cgetrs_work) +GETRS_LAUNCHER_SCRATCH(std::complex, LAPACKE_zgetrs_work) + +#undef GETRS_LAUNCHER_SCRATCH + +//These calls uses three separate work array, querying a single value is hard, stick to self contained call +#define HEEVD_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t heevd_scratchpad_size(sycl::queue & queue, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, \ + std::int64_t lda) { \ + return 0; \ + } + +HEEVD_LAUNCHER_SCRATCH(std::complex, LAPACKE_cheevd) +HEEVD_LAUNCHER_SCRATCH(std::complex, LAPACKE_zheevd) + +#undef HEEVD_LAUNCHER_SCRATCH + +#define HEGVD_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t hegvd_scratchpad_size(sycl::queue & queue, std::int64_t itype, \ + oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda, std::int64_t ldb) { \ + return 0; \ + } + +HEGVD_LAUNCHER_SCRATCH(std::complex, LAPACKE_chegvd) +HEGVD_LAUNCHER_SCRATCH(std::complex, LAPACKE_zhegvd) + +#undef HEGVD_LAUNCHER_SCRATCH + +template +inline void hetrd_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, nullptr, lda, + nullptr, nullptr, nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define HETRD_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t hetrd_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + hetrd_scratchpad_size(ROUTINE, #ROUTINE, queue, uplo, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +HETRD_LAUNCHER_SCRATCH(std::complex, LAPACKE_chetrd_work) +HETRD_LAUNCHER_SCRATCH(std::complex, LAPACKE_zhetrd_work) + +#undef HETRD_LAUNCHER_SCRATCH + +template +inline void hetrf_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, nullptr, lda, + nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define HETRF_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t hetrf_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + hetrf_scratchpad_size(ROUTINE, #ROUTINE, queue, uplo, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +HETRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_chetrf_work) +HETRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_zhetrf_work) +#undef HETRF_LAUNCHER_SCRATCH + +template +inline void orgbr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_generate(vec), m, n, k, nullptr, lda, + nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define ORGBR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t orgbr_scratchpad_size(sycl::queue & queue, oneapi::math::generate vec, \ + std::int64_t m, std::int64_t n, std::int64_t k, \ + std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + orgbr_scratchpad_size(ROUTINE, #ROUTINE, queue, vec, m, n, k, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +ORGBR_LAUNCHER_SCRATCH(float, LAPACKE_sorgbr_work) +ORGBR_LAUNCHER_SCRATCH(double, LAPACKE_dorgbr_work) + +#undef ORGBR_LAUNCHER_SCRATCH + +template +inline void orgtr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, nullptr, lda, + nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define ORGTR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t orgtr_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + orgtr_scratchpad_size(ROUTINE, #ROUTINE, queue, uplo, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +ORGTR_LAUNCHER_SCRATCH(float, LAPACKE_sorgtr_work) +ORGTR_LAUNCHER_SCRATCH(double, LAPACKE_dorgtr_work) + +#undef ORGTR_LAUNCHER_SCRATCH + +template +inline void orgqr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, m, n, k, nullptr, lda, nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define ORGQR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t orgqr_scratchpad_size(sycl::queue & queue, std::int64_t m, std::int64_t n, \ + std::int64_t k, std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + orgqr_scratchpad_size(ROUTINE, #ROUTINE, queue, m, n, k, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +ORGQR_LAUNCHER_SCRATCH(float, LAPACKE_sorgqr_work) +ORGQR_LAUNCHER_SCRATCH(double, LAPACKE_dorgqr_work) + +#undef ORGQR_LAUNCHER_SCRATCH + +template +inline void ormrq_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, n, k, + nullptr, lda, nullptr, nullptr, ldc, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define ORMRQ_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t ormrq_scratchpad_size( \ + sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + ormrq_scratchpad_size(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, lda, ldc, \ + &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +ORMRQ_LAUNCHER_SCRATCH(float, LAPACKE_sormrq_work) +ORMRQ_LAUNCHER_SCRATCH(double, LAPACKE_dormrq_work) + +#undef ORMRQ_LAUNCHER_SCRATCH + +template +inline void ormqr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, n, k, + nullptr, lda, nullptr, nullptr, ldc, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define ORMQR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t ormqr_scratchpad_size( \ + sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + ormqr_scratchpad_size(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, lda, ldc, \ + &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +ORMQR_LAUNCHER_SCRATCH(float, LAPACKE_sormqr_work) +ORMQR_LAUNCHER_SCRATCH(double, LAPACKE_dormqr_work) + +#undef ORMQR_LAUNCHER_SCRATCH + +template +inline void ormtr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_fill_mode(uplo), + get_operation(trans), m, n, nullptr, lda, nullptr, nullptr, + ldc, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define ORMTR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t ormtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ + oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, \ + std::int64_t n, std::int64_t lda, std::int64_t ldc) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + ormtr_scratchpad_size(ROUTINE, #ROUTINE, queue, side, uplo, trans, m, n, lda, ldc, \ + &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +ORMTR_LAUNCHER_SCRATCH(float, LAPACKE_sormtr_work) +ORMTR_LAUNCHER_SCRATCH(double, LAPACKE_dormtr_work) + +#undef ORMTR_LAUNCHER_SCRATCH + +#define POTRF_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t potrf_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ + } + +POTRF_LAUNCHER_SCRATCH(float, LAPACKE_spotrf_work) +POTRF_LAUNCHER_SCRATCH(double, LAPACKE_dpotrf_work) +POTRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_cpotrf_work) +POTRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_zpotrf_work) + +#undef POTRF_LAUNCHER_SCRATCH + +#define POTRS_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t potrs_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ + std::int64_t ldb) { \ + return 0; \ + } + +POTRS_LAUNCHER_SCRATCH(float, LAPACKE_spotrs_work) +POTRS_LAUNCHER_SCRATCH(double, LAPACKE_dpotrs_work) +POTRS_LAUNCHER_SCRATCH(std::complex, LAPACKE_cpotrs_work) +POTRS_LAUNCHER_SCRATCH(std::complex, LAPACKE_zpotrs_work) + +#undef POTRS_LAUNCHER_SCRATCH + +#define POTRI_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t potri_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda) { \ + return 0; \ + } + +POTRI_LAUNCHER_SCRATCH(float, LAPACKE_spotri_work) +POTRI_LAUNCHER_SCRATCH(double, LAPACKE_dpotri_work) +POTRI_LAUNCHER_SCRATCH(std::complex, LAPACKE_cpotri_work) +POTRI_LAUNCHER_SCRATCH(std::complex, LAPACKE_zpotri_work) + +#undef POTRI_LAUNCHER_SCRATCH + +template +inline void sytrf_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, nullptr, lda, + nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define SYTRF_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t sytrf_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + sytrf_scratchpad_size(ROUTINE, #ROUTINE, queue, uplo, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +SYTRF_LAUNCHER_SCRATCH(float, LAPACKE_ssytrf_work) +SYTRF_LAUNCHER_SCRATCH(double, LAPACKE_dsytrf_work) +SYTRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_csytrf_work) +SYTRF_LAUNCHER_SCRATCH(std::complex, LAPACKE_zsytrf_work) + +#undef SYTRF_LAUNCHER_SCRATCH + +#define SYEVD_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t syevd_scratchpad_size(sycl::queue & queue, oneapi::math::job jobz, \ + oneapi::math::uplo uplo, std::int64_t n, \ + std::int64_t lda) { \ + return 0; \ + } + +SYEVD_LAUNCHER_SCRATCH(float, LAPACKE_ssyevd) +SYEVD_LAUNCHER_SCRATCH(double, LAPACKE_dsyevd) + +#undef SYEVD_LAUNCHER_SCRATCH + +#define SYGVD_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t sygvd_scratchpad_size(sycl::queue & queue, std::int64_t itype, \ + oneapi::math::job jobz, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda, std::int64_t ldb) { \ + return 0; \ + } + +SYGVD_LAUNCHER_SCRATCH(float, LAPACKE_ssygvd) +SYGVD_LAUNCHER_SCRATCH(double, LAPACKE_dsygvd) + +#undef SYGVD_LAUNCHER_SCRATCH + +template +inline void sytrd_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, nullptr, lda, + nullptr, nullptr, nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define SYTRD_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t sytrd_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + sytrd_scratchpad_size(ROUTINE, #ROUTINE, queue, uplo, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +SYTRD_LAUNCHER_SCRATCH(float, LAPACKE_ssytrd_work) +SYTRD_LAUNCHER_SCRATCH(double, LAPACKE_dsytrd_work) + +#undef SYTRD_LAUNCHER_SCRATCH + +#define TRTRS_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t trtrs_scratchpad_size( \ + sycl::queue & queue, oneapi::math::uplo uplo, oneapi::math::transpose trans, \ + oneapi::math::diag diag, std::int64_t n, std::int64_t nrhs, std::int64_t lda, \ + std::int64_t ldb) { \ + return 0; \ + } + +TRTRS_LAUNCHER_SCRATCH(float, LAPACKE_strtrs) +TRTRS_LAUNCHER_SCRATCH(double, LAPACKE_dtrtrs) +TRTRS_LAUNCHER_SCRATCH(std::complex, LAPACKE_ctrtrs) +TRTRS_LAUNCHER_SCRATCH(std::complex, LAPACKE_ztrtrs) + +#undef TRTRS_LAUNCHER_SCRATCH + +template +inline void ungbr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::generate vec, std::int64_t m, std::int64_t n, + std::int64_t k, std::int64_t lda, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_generate(vec), m, n, k, nullptr, lda, + nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define UNGBR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t ungbr_scratchpad_size(sycl::queue & queue, oneapi::math::generate vec, \ + std::int64_t m, std::int64_t n, std::int64_t k, \ + std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + ungbr_scratchpad_size(ROUTINE, #ROUTINE, queue, vec, m, n, k, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +UNGBR_LAUNCHER_SCRATCH(std::complex, LAPACKE_cungbr_work) +UNGBR_LAUNCHER_SCRATCH(std::complex, LAPACKE_zungbr_work) + +#undef UNGBR_LAUNCHER_SCRATCH + +template +inline void ungqr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, m, n, k, nullptr, lda, nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define UNGQR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t ungqr_scratchpad_size(sycl::queue & queue, std::int64_t m, std::int64_t n, \ + std::int64_t k, std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + ungqr_scratchpad_size(ROUTINE, #ROUTINE, queue, m, n, k, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +UNGQR_LAUNCHER_SCRATCH(std::complex, LAPACKE_cungqr_work) +UNGQR_LAUNCHER_SCRATCH(std::complex, LAPACKE_zungqr_work) + +#undef UNGQR_LAUNCHER_SCRATCH +template +inline void ungtr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::uplo uplo, std::int64_t n, std::int64_t lda, + TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_fill_mode(uplo), n, nullptr, lda, + nullptr, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define UNGTR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t ungtr_scratchpad_size(sycl::queue & queue, oneapi::math::uplo uplo, \ + std::int64_t n, std::int64_t lda) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + ungtr_scratchpad_size(ROUTINE, #ROUTINE, queue, uplo, n, lda, &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +UNGTR_LAUNCHER_SCRATCH(std::complex, LAPACKE_cungtr_work) +UNGTR_LAUNCHER_SCRATCH(std::complex, LAPACKE_zungtr_work) + +#undef UNGTR_LAUNCHER_SCRATCH + +template +inline void unmrq_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, n, k, + nullptr, lda, nullptr, nullptr, ldc, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define UNMRQ_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t unmrq_scratchpad_size( \ + sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + unmrq_scratchpad_size(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, lda, ldc, \ + &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +UNMRQ_LAUNCHER_SCRATCH(std::complex, LAPACKE_cunmrq_work) +UNMRQ_LAUNCHER_SCRATCH(std::complex, LAPACKE_zunmrq_work) + +#undef UNMRQ_LAUNCHER_SCRATCH + +template +inline void unmqr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::transpose trans, + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, + std::int64_t ldc, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = + func(LAPACK_COL_MAJOR, get_side_mode(side), get_operation(trans), m, n, k, + nullptr, lda, nullptr, nullptr, ldc, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define UNMQR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t unmqr_scratchpad_size( \ + sycl::queue & queue, oneapi::math::side side, oneapi::math::transpose trans, \ + std::int64_t m, std::int64_t n, std::int64_t k, std::int64_t lda, std::int64_t ldc) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + unmqr_scratchpad_size(ROUTINE, #ROUTINE, queue, side, trans, m, n, k, lda, ldc, \ + &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +UNMQR_LAUNCHER_SCRATCH(std::complex, LAPACKE_cunmqr_work) +UNMQR_LAUNCHER_SCRATCH(std::complex, LAPACKE_zunmqr_work) + +#undef UNMQR_LAUNCHER_SCRATCH + +template +inline void unmtr_scratchpad_size(Func func, const char* func_name, sycl::queue& queue, + oneapi::math::side side, oneapi::math::uplo uplo, + oneapi::math::transpose trans, std::int64_t m, std::int64_t n, + std::int64_t lda, std::int64_t ldc, TYPE* work_query) { + using ArmDataType = typename ArmEquivalentType::Type; + queue + .submit([&](sycl::handler& cgh) { + host_task(cgh, [=]() { + std::int64_t err = func(LAPACK_COL_MAJOR, get_side_mode(side), get_fill_mode(uplo), + get_operation(trans), m, n, nullptr, lda, nullptr, nullptr, + ldc, work_query, -1); + if (err != 0) { + throw armpl_lapacke_error(func_name, err); + } + }); + }) + .wait(); +} + +#define UNMTR_LAUNCHER_SCRATCH(TYPE, ROUTINE) \ + template <> \ + std::int64_t unmtr_scratchpad_size(sycl::queue & queue, oneapi::math::side side, \ + oneapi::math::uplo uplo, \ + oneapi::math::transpose trans, std::int64_t m, \ + std::int64_t n, std::int64_t lda, std::int64_t ldc) { \ + using ArmDataType = typename ArmEquivalentType::Type; \ + ArmDataType work_query; \ + unmtr_scratchpad_size(ROUTINE, #ROUTINE, queue, side, uplo, trans, m, n, lda, ldc, \ + &work_query); \ + return cast_to_int_if_complex(work_query); \ + } + +UNMTR_LAUNCHER_SCRATCH(std::complex, LAPACKE_cunmtr_work) +UNMTR_LAUNCHER_SCRATCH(std::complex, LAPACKE_zunmtr_work) + +#undef UNMTR_LAUNCHER_SCRATCH + +} // namespace armpl +} // namespace lapack +} // namespace math +} // namespace oneapi diff --git a/src/lapack/backends/armpl/armpl_wrappers_table_dyn.cpp b/src/lapack/backends/armpl/armpl_wrappers_table_dyn.cpp new file mode 100644 index 000000000..397c33e9b --- /dev/null +++ b/src/lapack/backends/armpl/armpl_wrappers_table_dyn.cpp @@ -0,0 +1,428 @@ +/******************************************************************************* +* Copyright 2025 SiPearl +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, +* software distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions +* and limitations under the License. +* +* +* SPDX-License-Identifier: Apache-2.0 +*******************************************************************************/ + +#include "armpl_common.hpp" +#include "oneapi/math/lapack/detail/armpl/onemath_lapack_armpl.hxx" +#include "lapack/function_table.hpp" + +#define WRAPPER_VERSION 1 + +extern "C" lapack_function_table_t onemath_lapack_table = { + WRAPPER_VERSION, +#define LAPACK_BACKEND armpl + oneapi::math::lapack::armpl::gebrd, + oneapi::math::lapack::armpl::gebrd, + oneapi::math::lapack::armpl::gebrd, + oneapi::math::lapack::armpl::gebrd, + oneapi::math::lapack::armpl::gerqf, + oneapi::math::lapack::armpl::gerqf, + oneapi::math::lapack::armpl::gerqf, + oneapi::math::lapack::armpl::gerqf, + oneapi::math::lapack::armpl::geqrf, + oneapi::math::lapack::armpl::geqrf, + oneapi::math::lapack::armpl::geqrf, + oneapi::math::lapack::armpl::geqrf, + oneapi::math::lapack::armpl::getrf, + oneapi::math::lapack::armpl::getrf, + oneapi::math::lapack::armpl::getrf, + oneapi::math::lapack::armpl::getrf, + oneapi::math::lapack::armpl::getri, + oneapi::math::lapack::armpl::getri, + oneapi::math::lapack::armpl::getri, + oneapi::math::lapack::armpl::getri, + oneapi::math::lapack::armpl::getrs, + oneapi::math::lapack::armpl::getrs, + oneapi::math::lapack::armpl::getrs, + oneapi::math::lapack::armpl::getrs, + oneapi::math::lapack::armpl::gesvd, + oneapi::math::lapack::armpl::gesvd, + oneapi::math::lapack::armpl::gesvd, + oneapi::math::lapack::armpl::gesvd, + oneapi::math::lapack::armpl::heevd, + oneapi::math::lapack::armpl::heevd, + oneapi::math::lapack::armpl::hegvd, + oneapi::math::lapack::armpl::hegvd, + oneapi::math::lapack::armpl::hetrd, + oneapi::math::lapack::armpl::hetrd, + oneapi::math::lapack::armpl::hetrf, + oneapi::math::lapack::armpl::hetrf, + oneapi::math::lapack::armpl::orgbr, + oneapi::math::lapack::armpl::orgbr, + oneapi::math::lapack::armpl::orgqr, + oneapi::math::lapack::armpl::orgqr, + oneapi::math::lapack::armpl::orgtr, + oneapi::math::lapack::armpl::orgtr, + oneapi::math::lapack::armpl::ormtr, + oneapi::math::lapack::armpl::ormtr, + oneapi::math::lapack::armpl::ormrq, + oneapi::math::lapack::armpl::ormrq, + oneapi::math::lapack::armpl::ormqr, + oneapi::math::lapack::armpl::ormqr, + oneapi::math::lapack::armpl::potrf, + oneapi::math::lapack::armpl::potrf, + oneapi::math::lapack::armpl::potrf, + oneapi::math::lapack::armpl::potrf, + oneapi::math::lapack::armpl::potri, + oneapi::math::lapack::armpl::potri, + oneapi::math::lapack::armpl::potri, + oneapi::math::lapack::armpl::potri, + oneapi::math::lapack::armpl::potrs, + oneapi::math::lapack::armpl::potrs, + oneapi::math::lapack::armpl::potrs, + oneapi::math::lapack::armpl::potrs, + oneapi::math::lapack::armpl::syevd, + oneapi::math::lapack::armpl::syevd, + oneapi::math::lapack::armpl::sygvd, + oneapi::math::lapack::armpl::sygvd, + oneapi::math::lapack::armpl::sytrd, + oneapi::math::lapack::armpl::sytrd, + oneapi::math::lapack::armpl::sytrf, + oneapi::math::lapack::armpl::sytrf, + oneapi::math::lapack::armpl::sytrf, + oneapi::math::lapack::armpl::sytrf, + oneapi::math::lapack::armpl::trtrs, + oneapi::math::lapack::armpl::trtrs, + oneapi::math::lapack::armpl::trtrs, + oneapi::math::lapack::armpl::trtrs, + oneapi::math::lapack::armpl::ungbr, + oneapi::math::lapack::armpl::ungbr, + oneapi::math::lapack::armpl::ungqr, + oneapi::math::lapack::armpl::ungqr, + oneapi::math::lapack::armpl::ungtr, + oneapi::math::lapack::armpl::ungtr, + oneapi::math::lapack::armpl::unmrq, + oneapi::math::lapack::armpl::unmrq, + oneapi::math::lapack::armpl::unmqr, + oneapi::math::lapack::armpl::unmqr, + oneapi::math::lapack::armpl::unmtr, + oneapi::math::lapack::armpl::unmtr, + oneapi::math::lapack::armpl::gebrd, + oneapi::math::lapack::armpl::gebrd, + oneapi::math::lapack::armpl::gebrd, + oneapi::math::lapack::armpl::gebrd, + oneapi::math::lapack::armpl::gerqf, + oneapi::math::lapack::armpl::gerqf, + oneapi::math::lapack::armpl::gerqf, + oneapi::math::lapack::armpl::gerqf, + oneapi::math::lapack::armpl::geqrf, + oneapi::math::lapack::armpl::geqrf, + oneapi::math::lapack::armpl::geqrf, + oneapi::math::lapack::armpl::geqrf, + oneapi::math::lapack::armpl::getrf, + oneapi::math::lapack::armpl::getrf, + oneapi::math::lapack::armpl::getrf, + oneapi::math::lapack::armpl::getrf, + oneapi::math::lapack::armpl::getri, + oneapi::math::lapack::armpl::getri, + oneapi::math::lapack::armpl::getri, + oneapi::math::lapack::armpl::getri, + oneapi::math::lapack::armpl::getrs, + oneapi::math::lapack::armpl::getrs, + oneapi::math::lapack::armpl::getrs, + oneapi::math::lapack::armpl::getrs, + oneapi::math::lapack::armpl::gesvd, + oneapi::math::lapack::armpl::gesvd, + oneapi::math::lapack::armpl::gesvd, + oneapi::math::lapack::armpl::gesvd, + oneapi::math::lapack::armpl::heevd, + oneapi::math::lapack::armpl::heevd, + oneapi::math::lapack::armpl::hegvd, + oneapi::math::lapack::armpl::hegvd, + oneapi::math::lapack::armpl::hetrd, + oneapi::math::lapack::armpl::hetrd, + oneapi::math::lapack::armpl::hetrf, + oneapi::math::lapack::armpl::hetrf, + oneapi::math::lapack::armpl::orgbr, + oneapi::math::lapack::armpl::orgbr, + oneapi::math::lapack::armpl::orgqr, + oneapi::math::lapack::armpl::orgqr, + oneapi::math::lapack::armpl::orgtr, + oneapi::math::lapack::armpl::orgtr, + oneapi::math::lapack::armpl::ormtr, + oneapi::math::lapack::armpl::ormtr, + oneapi::math::lapack::armpl::ormrq, + oneapi::math::lapack::armpl::ormrq, + oneapi::math::lapack::armpl::ormqr, + oneapi::math::lapack::armpl::ormqr, + oneapi::math::lapack::armpl::potrf, + oneapi::math::lapack::armpl::potrf, + oneapi::math::lapack::armpl::potrf, + oneapi::math::lapack::armpl::potrf, + oneapi::math::lapack::armpl::potri, + oneapi::math::lapack::armpl::potri, + oneapi::math::lapack::armpl::potri, + oneapi::math::lapack::armpl::potri, + oneapi::math::lapack::armpl::potrs, + oneapi::math::lapack::armpl::potrs, + oneapi::math::lapack::armpl::potrs, + oneapi::math::lapack::armpl::potrs, + oneapi::math::lapack::armpl::syevd, + oneapi::math::lapack::armpl::syevd, + oneapi::math::lapack::armpl::sygvd, + oneapi::math::lapack::armpl::sygvd, + oneapi::math::lapack::armpl::sytrd, + oneapi::math::lapack::armpl::sytrd, + oneapi::math::lapack::armpl::sytrf, + oneapi::math::lapack::armpl::sytrf, + oneapi::math::lapack::armpl::sytrf, + oneapi::math::lapack::armpl::sytrf, + oneapi::math::lapack::armpl::trtrs, + oneapi::math::lapack::armpl::trtrs, + oneapi::math::lapack::armpl::trtrs, + oneapi::math::lapack::armpl::trtrs, + oneapi::math::lapack::armpl::ungbr, + oneapi::math::lapack::armpl::ungbr, + oneapi::math::lapack::armpl::ungqr, + oneapi::math::lapack::armpl::ungqr, + oneapi::math::lapack::armpl::ungtr, + oneapi::math::lapack::armpl::ungtr, + oneapi::math::lapack::armpl::unmrq, + oneapi::math::lapack::armpl::unmrq, + oneapi::math::lapack::armpl::unmqr, + oneapi::math::lapack::armpl::unmqr, + oneapi::math::lapack::armpl::unmtr, + oneapi::math::lapack::armpl::unmtr, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::orgqr_batch, + oneapi::math::lapack::armpl::orgqr_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::ungqr_batch, + oneapi::math::lapack::armpl::ungqr_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::orgqr_batch, + oneapi::math::lapack::armpl::orgqr_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::ungqr_batch, + oneapi::math::lapack::armpl::ungqr_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::geqrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getrf_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getri_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::getrs_batch, + oneapi::math::lapack::armpl::orgqr_batch, + oneapi::math::lapack::armpl::orgqr_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrf_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::potrs_batch, + oneapi::math::lapack::armpl::ungqr_batch, + oneapi::math::lapack::armpl::ungqr_batch, + oneapi::math::lapack::armpl::gebrd_scratchpad_size, + oneapi::math::lapack::armpl::gebrd_scratchpad_size, + oneapi::math::lapack::armpl::gebrd_scratchpad_size>, + oneapi::math::lapack::armpl::gebrd_scratchpad_size>, + oneapi::math::lapack::armpl::gerqf_scratchpad_size, + oneapi::math::lapack::armpl::gerqf_scratchpad_size, + oneapi::math::lapack::armpl::gerqf_scratchpad_size>, + oneapi::math::lapack::armpl::gerqf_scratchpad_size>, + oneapi::math::lapack::armpl::geqrf_scratchpad_size, + oneapi::math::lapack::armpl::geqrf_scratchpad_size, + oneapi::math::lapack::armpl::geqrf_scratchpad_size>, + oneapi::math::lapack::armpl::geqrf_scratchpad_size>, + oneapi::math::lapack::armpl::gesvd_scratchpad_size, + oneapi::math::lapack::armpl::gesvd_scratchpad_size, + oneapi::math::lapack::armpl::gesvd_scratchpad_size>, + oneapi::math::lapack::armpl::gesvd_scratchpad_size>, + oneapi::math::lapack::armpl::getrf_scratchpad_size, + oneapi::math::lapack::armpl::getrf_scratchpad_size, + oneapi::math::lapack::armpl::getrf_scratchpad_size>, + oneapi::math::lapack::armpl::getrf_scratchpad_size>, + oneapi::math::lapack::armpl::getri_scratchpad_size, + oneapi::math::lapack::armpl::getri_scratchpad_size, + oneapi::math::lapack::armpl::getri_scratchpad_size>, + oneapi::math::lapack::armpl::getri_scratchpad_size>, + oneapi::math::lapack::armpl::getrs_scratchpad_size, + oneapi::math::lapack::armpl::getrs_scratchpad_size, + oneapi::math::lapack::armpl::getrs_scratchpad_size>, + oneapi::math::lapack::armpl::getrs_scratchpad_size>, + oneapi::math::lapack::armpl::heevd_scratchpad_size>, + oneapi::math::lapack::armpl::heevd_scratchpad_size>, + oneapi::math::lapack::armpl::hegvd_scratchpad_size>, + oneapi::math::lapack::armpl::hegvd_scratchpad_size>, + oneapi::math::lapack::armpl::hetrd_scratchpad_size>, + oneapi::math::lapack::armpl::hetrd_scratchpad_size>, + oneapi::math::lapack::armpl::hetrf_scratchpad_size>, + oneapi::math::lapack::armpl::hetrf_scratchpad_size>, + oneapi::math::lapack::armpl::orgbr_scratchpad_size, + oneapi::math::lapack::armpl::orgbr_scratchpad_size, + oneapi::math::lapack::armpl::orgtr_scratchpad_size, + oneapi::math::lapack::armpl::orgtr_scratchpad_size, + oneapi::math::lapack::armpl::orgqr_scratchpad_size, + oneapi::math::lapack::armpl::orgqr_scratchpad_size, + oneapi::math::lapack::armpl::ormrq_scratchpad_size, + oneapi::math::lapack::armpl::ormrq_scratchpad_size, + oneapi::math::lapack::armpl::ormqr_scratchpad_size, + oneapi::math::lapack::armpl::ormqr_scratchpad_size, + oneapi::math::lapack::armpl::ormtr_scratchpad_size, + oneapi::math::lapack::armpl::ormtr_scratchpad_size, + oneapi::math::lapack::armpl::potrf_scratchpad_size, + oneapi::math::lapack::armpl::potrf_scratchpad_size, + oneapi::math::lapack::armpl::potrf_scratchpad_size>, + oneapi::math::lapack::armpl::potrf_scratchpad_size>, + oneapi::math::lapack::armpl::potrs_scratchpad_size, + oneapi::math::lapack::armpl::potrs_scratchpad_size, + oneapi::math::lapack::armpl::potrs_scratchpad_size>, + oneapi::math::lapack::armpl::potrs_scratchpad_size>, + oneapi::math::lapack::armpl::potri_scratchpad_size, + oneapi::math::lapack::armpl::potri_scratchpad_size, + oneapi::math::lapack::armpl::potri_scratchpad_size>, + oneapi::math::lapack::armpl::potri_scratchpad_size>, + oneapi::math::lapack::armpl::sytrf_scratchpad_size, + oneapi::math::lapack::armpl::sytrf_scratchpad_size, + oneapi::math::lapack::armpl::sytrf_scratchpad_size>, + oneapi::math::lapack::armpl::sytrf_scratchpad_size>, + oneapi::math::lapack::armpl::syevd_scratchpad_size, + oneapi::math::lapack::armpl::syevd_scratchpad_size, + oneapi::math::lapack::armpl::sygvd_scratchpad_size, + oneapi::math::lapack::armpl::sygvd_scratchpad_size, + oneapi::math::lapack::armpl::sytrd_scratchpad_size, + oneapi::math::lapack::armpl::sytrd_scratchpad_size, + oneapi::math::lapack::armpl::trtrs_scratchpad_size, + oneapi::math::lapack::armpl::trtrs_scratchpad_size, + oneapi::math::lapack::armpl::trtrs_scratchpad_size>, + oneapi::math::lapack::armpl::trtrs_scratchpad_size>, + oneapi::math::lapack::armpl::ungbr_scratchpad_size>, + oneapi::math::lapack::armpl::ungbr_scratchpad_size>, + oneapi::math::lapack::armpl::ungqr_scratchpad_size>, + oneapi::math::lapack::armpl::ungqr_scratchpad_size>, + oneapi::math::lapack::armpl::ungtr_scratchpad_size>, + oneapi::math::lapack::armpl::ungtr_scratchpad_size>, + oneapi::math::lapack::armpl::unmrq_scratchpad_size>, + oneapi::math::lapack::armpl::unmrq_scratchpad_size>, + oneapi::math::lapack::armpl::unmqr_scratchpad_size>, + oneapi::math::lapack::armpl::unmqr_scratchpad_size>, + oneapi::math::lapack::armpl::unmtr_scratchpad_size>, + oneapi::math::lapack::armpl::unmtr_scratchpad_size>, + oneapi::math::lapack::armpl::getrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::getrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::getrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getri_batch_scratchpad_size, + oneapi::math::lapack::armpl::getri_batch_scratchpad_size, + oneapi::math::lapack::armpl::getri_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getri_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getrs_batch_scratchpad_size, + oneapi::math::lapack::armpl::getrs_batch_scratchpad_size, + oneapi::math::lapack::armpl::getrs_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getrs_batch_scratchpad_size>, + oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::potrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::potrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::potrs_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrs_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrs_batch_scratchpad_size>, + oneapi::math::lapack::armpl::potrs_batch_scratchpad_size>, + oneapi::math::lapack::armpl::orgqr_batch_scratchpad_size, + oneapi::math::lapack::armpl::orgqr_batch_scratchpad_size, + oneapi::math::lapack::armpl::ungqr_batch_scratchpad_size>, + oneapi::math::lapack::armpl::ungqr_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::getrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::getrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getri_batch_scratchpad_size, + oneapi::math::lapack::armpl::getri_batch_scratchpad_size, + oneapi::math::lapack::armpl::getri_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getri_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getrs_batch_scratchpad_size, + oneapi::math::lapack::armpl::getrs_batch_scratchpad_size, + oneapi::math::lapack::armpl::getrs_batch_scratchpad_size>, + oneapi::math::lapack::armpl::getrs_batch_scratchpad_size>, + oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::geqrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::orgqr_batch_scratchpad_size, + oneapi::math::lapack::armpl::orgqr_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrf_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::potrf_batch_scratchpad_size>, + oneapi::math::lapack::armpl::potrs_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrs_batch_scratchpad_size, + oneapi::math::lapack::armpl::potrs_batch_scratchpad_size>, + oneapi::math::lapack::armpl::potrs_batch_scratchpad_size>, + oneapi::math::lapack::armpl::ungqr_batch_scratchpad_size>, + oneapi::math::lapack::armpl::ungqr_batch_scratchpad_size> +#undef LAPACK_BACKEND +}; diff --git a/tests/unit_tests/CMakeLists.txt b/tests/unit_tests/CMakeLists.txt index a9df19787..a4a496410 100644 --- a/tests/unit_tests/CMakeLists.txt +++ b/tests/unit_tests/CMakeLists.txt @@ -171,7 +171,12 @@ foreach(domain ${TEST_TARGET_DOMAINS}) add_dependencies(test_main_${domain}_ct onemath_${domain}_rocsolver) list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_rocsolver) endif() - + + if(domain STREQUAL "lapack" AND ENABLE_ARMPL_BACKEND) + add_dependencies(test_main_${domain}_ct onemath_${domain}_armpl) + list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_armpl) + endif() + if(domain STREQUAL "rng" AND ENABLE_CURAND_BACKEND) add_dependencies(test_main_${domain}_ct onemath_${domain}_curand) list(APPEND ONEMATH_LIBRARIES_${domain} onemath_${domain}_curand)